aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ubifs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ubifs')
-rw-r--r--fs/ubifs/budget.c114
-rw-r--r--fs/ubifs/debug.c2
-rw-r--r--fs/ubifs/dir.c3
-rw-r--r--fs/ubifs/file.c20
-rw-r--r--fs/ubifs/find.c19
-rw-r--r--fs/ubifs/gc.c20
-rw-r--r--fs/ubifs/misc.h49
-rw-r--r--fs/ubifs/super.c25
-rw-r--r--fs/ubifs/tnc.c116
-rw-r--r--fs/ubifs/ubifs-media.h2
-rw-r--r--fs/ubifs/ubifs.h14
11 files changed, 235 insertions, 149 deletions
diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c
index 154098157473..73db464cd08b 100644
--- a/fs/ubifs/budget.c
+++ b/fs/ubifs/budget.c
@@ -302,18 +302,6 @@ long long ubifs_calc_available(const struct ubifs_info *c, int min_idx_lebs)
302 int subtract_lebs; 302 int subtract_lebs;
303 long long available; 303 long long available;
304 304
305 /*
306 * Force the amount available to the total size reported if the used
307 * space is zero.
308 */
309 if (c->lst.total_used <= UBIFS_INO_NODE_SZ &&
310 c->budg_data_growth + c->budg_dd_growth == 0) {
311 /* Do the same calculation as for c->block_cnt */
312 available = c->main_lebs - 2;
313 available *= c->leb_size - c->dark_wm;
314 return available;
315 }
316
317 available = c->main_bytes - c->lst.total_used; 305 available = c->main_bytes - c->lst.total_used;
318 306
319 /* 307 /*
@@ -714,34 +702,106 @@ void ubifs_release_dirty_inode_budget(struct ubifs_info *c,
714} 702}
715 703
716/** 704/**
717 * ubifs_budg_get_free_space - return amount of free space. 705 * ubifs_reported_space - calculate reported free space.
706 * @c: the UBIFS file-system description object
707 * @free: amount of free space
708 *
709 * This function calculates amount of free space which will be reported to
710 * user-space. User-space application tend to expect that if the file-system
711 * (e.g., via the 'statfs()' call) reports that it has N bytes available, they
712 * are able to write a file of size N. UBIFS attaches node headers to each data
713 * node and it has to write indexind nodes as well. This introduces additional
714 * overhead, and UBIFS it has to report sligtly less free space to meet the
715 * above expectetion.
716 *
717 * This function assumes free space is made up of uncompressed data nodes and
718 * full index nodes (one per data node, tripled because we always allow enough
719 * space to write the index thrice).
720 *
721 * Note, the calculation is pessimistic, which means that most of the time
722 * UBIFS reports less space than it actually has.
723 */
724long long ubifs_reported_space(const struct ubifs_info *c, uint64_t free)
725{
726 int divisor, factor, f;
727
728 /*
729 * Reported space size is @free * X, where X is UBIFS block size
730 * divided by UBIFS block size + all overhead one data block
731 * introduces. The overhead is the node header + indexing overhead.
732 *
733 * Indexing overhead calculations are based on the following formula:
734 * I = N/(f - 1) + 1, where I - number of indexing nodes, N - number
735 * of data nodes, f - fanout. Because effective UBIFS fanout is twice
736 * as less than maximum fanout, we assume that each data node
737 * introduces 3 * @c->max_idx_node_sz / (@c->fanout/2 - 1) bytes.
738 * Note, the multiplier 3 is because UBIFS reseves thrice as more space
739 * for the index.
740 */
741 f = c->fanout > 3 ? c->fanout >> 1 : 2;
742 factor = UBIFS_BLOCK_SIZE;
743 divisor = UBIFS_MAX_DATA_NODE_SZ;
744 divisor += (c->max_idx_node_sz * 3) / (f - 1);
745 free *= factor;
746 do_div(free, divisor);
747 return free;
748}
749
750/**
751 * ubifs_get_free_space - return amount of free space.
718 * @c: UBIFS file-system description object 752 * @c: UBIFS file-system description object
719 * 753 *
720 * This function returns amount of free space on the file-system. 754 * This function calculates amount of free space to report to user-space.
755 *
756 * Because UBIFS may introduce substantial overhead (the index, node headers,
757 * alighment, wastage at the end of eraseblocks, etc), it cannot report real
758 * amount of free flash space it has (well, because not all dirty space is
759 * reclamable, UBIFS does not actually know the real amount). If UBIFS did so,
760 * it would bread user expectetion about what free space is. Users seem to
761 * accustomed to assume that if the file-system reports N bytes of free space,
762 * they would be able to fit a file of N bytes to the FS. This almost works for
763 * traditional file-systems, because they have way less overhead than UBIFS.
764 * So, to keep users happy, UBIFS tries to take the overhead into account.
721 */ 765 */
722long long ubifs_budg_get_free_space(struct ubifs_info *c) 766long long ubifs_get_free_space(struct ubifs_info *c)
723{ 767{
724 int min_idx_lebs, rsvd_idx_lebs; 768 int min_idx_lebs, rsvd_idx_lebs, lebs;
725 long long available, outstanding, free; 769 long long available, outstanding, free;
726 770
727 /* Do exactly the same calculations as in 'do_budget_space()' */
728 spin_lock(&c->space_lock); 771 spin_lock(&c->space_lock);
729 min_idx_lebs = ubifs_calc_min_idx_lebs(c); 772 min_idx_lebs = ubifs_calc_min_idx_lebs(c);
773 outstanding = c->budg_data_growth + c->budg_dd_growth;
730 774
731 if (min_idx_lebs > c->lst.idx_lebs) 775 /*
732 rsvd_idx_lebs = min_idx_lebs - c->lst.idx_lebs; 776 * Force the amount available to the total size reported if the used
733 else 777 * space is zero.
734 rsvd_idx_lebs = 0; 778 */
735 779 if (c->lst.total_used <= UBIFS_INO_NODE_SZ && !outstanding) {
736 if (rsvd_idx_lebs > c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt
737 - c->lst.taken_empty_lebs) {
738 spin_unlock(&c->space_lock); 780 spin_unlock(&c->space_lock);
739 return 0; 781 return (long long)c->block_cnt << UBIFS_BLOCK_SHIFT;
740 } 782 }
741 783
742 available = ubifs_calc_available(c, min_idx_lebs); 784 available = ubifs_calc_available(c, min_idx_lebs);
743 outstanding = c->budg_data_growth + c->budg_dd_growth; 785
744 c->min_idx_lebs = min_idx_lebs; 786 /*
787 * When reporting free space to user-space, UBIFS guarantees that it is
788 * possible to write a file of free space size. This means that for
789 * empty LEBs we may use more precise calculations than
790 * 'ubifs_calc_available()' is using. Namely, we know that in empty
791 * LEBs we would waste only @c->leb_overhead bytes, not @c->dark_wm.
792 * Thus, amend the available space.
793 *
794 * Note, the calculations below are similar to what we have in
795 * 'do_budget_space()', so refer there for comments.
796 */
797 if (min_idx_lebs > c->lst.idx_lebs)
798 rsvd_idx_lebs = min_idx_lebs - c->lst.idx_lebs;
799 else
800 rsvd_idx_lebs = 0;
801 lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt -
802 c->lst.taken_empty_lebs;
803 lebs -= rsvd_idx_lebs;
804 available += lebs * (c->dark_wm - c->leb_overhead);
745 spin_unlock(&c->space_lock); 805 spin_unlock(&c->space_lock);
746 806
747 if (available > outstanding) 807 if (available > outstanding)
diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c
index b9cb77473758..d7f7645779f2 100644
--- a/fs/ubifs/debug.c
+++ b/fs/ubifs/debug.c
@@ -538,7 +538,7 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node)
538 printk(KERN_DEBUG "\t%d orphan inode numbers:\n", n); 538 printk(KERN_DEBUG "\t%d orphan inode numbers:\n", n);
539 for (i = 0; i < n; i++) 539 for (i = 0; i < n; i++)
540 printk(KERN_DEBUG "\t ino %llu\n", 540 printk(KERN_DEBUG "\t ino %llu\n",
541 le64_to_cpu(orph->inos[i])); 541 (unsigned long long)le64_to_cpu(orph->inos[i]));
542 break; 542 break;
543 } 543 }
544 default: 544 default:
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index 5c96f1fb7016..526c01ec8003 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -426,7 +426,7 @@ static int ubifs_readdir(struct file *file, void *dirent, filldir_t filldir)
426 426
427 while (1) { 427 while (1) {
428 dbg_gen("feed '%s', ino %llu, new f_pos %#x", 428 dbg_gen("feed '%s', ino %llu, new f_pos %#x",
429 dent->name, le64_to_cpu(dent->inum), 429 dent->name, (unsigned long long)le64_to_cpu(dent->inum),
430 key_hash_flash(c, &dent->key)); 430 key_hash_flash(c, &dent->key));
431 ubifs_assert(dent->ch.sqnum > ubifs_inode(dir)->creat_sqnum); 431 ubifs_assert(dent->ch.sqnum > ubifs_inode(dir)->creat_sqnum);
432 432
@@ -587,7 +587,6 @@ static int ubifs_unlink(struct inode *dir, struct dentry *dentry)
587 if (err) { 587 if (err) {
588 if (err != -ENOSPC) 588 if (err != -ENOSPC)
589 return err; 589 return err;
590 err = 0;
591 budgeted = 0; 590 budgeted = 0;
592 } 591 }
593 592
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 4071d1cae29f..3d698e2022b1 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -793,7 +793,7 @@ static int do_truncation(struct ubifs_info *c, struct inode *inode,
793 int err; 793 int err;
794 struct ubifs_budget_req req; 794 struct ubifs_budget_req req;
795 loff_t old_size = inode->i_size, new_size = attr->ia_size; 795 loff_t old_size = inode->i_size, new_size = attr->ia_size;
796 int offset = new_size & (UBIFS_BLOCK_SIZE - 1); 796 int offset = new_size & (UBIFS_BLOCK_SIZE - 1), budgeted = 1;
797 struct ubifs_inode *ui = ubifs_inode(inode); 797 struct ubifs_inode *ui = ubifs_inode(inode);
798 798
799 dbg_gen("ino %lu, size %lld -> %lld", inode->i_ino, old_size, new_size); 799 dbg_gen("ino %lu, size %lld -> %lld", inode->i_ino, old_size, new_size);
@@ -811,8 +811,15 @@ static int do_truncation(struct ubifs_info *c, struct inode *inode,
811 /* A funny way to budget for truncation node */ 811 /* A funny way to budget for truncation node */
812 req.dirtied_ino_d = UBIFS_TRUN_NODE_SZ; 812 req.dirtied_ino_d = UBIFS_TRUN_NODE_SZ;
813 err = ubifs_budget_space(c, &req); 813 err = ubifs_budget_space(c, &req);
814 if (err) 814 if (err) {
815 return err; 815 /*
816 * Treat truncations to zero as deletion and always allow them,
817 * just like we do for '->unlink()'.
818 */
819 if (new_size || err != -ENOSPC)
820 return err;
821 budgeted = 0;
822 }
816 823
817 err = vmtruncate(inode, new_size); 824 err = vmtruncate(inode, new_size);
818 if (err) 825 if (err)
@@ -869,7 +876,12 @@ static int do_truncation(struct ubifs_info *c, struct inode *inode,
869 err = ubifs_jnl_truncate(c, inode, old_size, new_size); 876 err = ubifs_jnl_truncate(c, inode, old_size, new_size);
870 mutex_unlock(&ui->ui_mutex); 877 mutex_unlock(&ui->ui_mutex);
871out_budg: 878out_budg:
872 ubifs_release_budget(c, &req); 879 if (budgeted)
880 ubifs_release_budget(c, &req);
881 else {
882 c->nospace = c->nospace_rp = 0;
883 smp_wmb();
884 }
873 return err; 885 return err;
874} 886}
875 887
diff --git a/fs/ubifs/find.c b/fs/ubifs/find.c
index adee7b5ddeab..47814cde2407 100644
--- a/fs/ubifs/find.c
+++ b/fs/ubifs/find.c
@@ -211,14 +211,8 @@ static const struct ubifs_lprops *scan_for_dirty(struct ubifs_info *c,
211 * dirty index heap, and it falls-back to LPT scanning if the heaps are empty 211 * dirty index heap, and it falls-back to LPT scanning if the heaps are empty
212 * or do not have an LEB which satisfies the @min_space criteria. 212 * or do not have an LEB which satisfies the @min_space criteria.
213 * 213 *
214 * Note: 214 * Note, LEBs which have less than dead watermark of free + dirty space are
215 * o LEBs which have less than dead watermark of dirty space are never picked 215 * never picked by this function.
216 * by this function;
217 *
218 * Returns zero and the LEB properties of
219 * found dirty LEB in case of success, %-ENOSPC if no dirty LEB was found and a
220 * negative error code in case of other failures. The returned LEB is marked as
221 * "taken".
222 * 216 *
223 * The additional @pick_free argument controls if this function has to return a 217 * The additional @pick_free argument controls if this function has to return a
224 * free or freeable LEB if one is present. For example, GC must to set it to %1, 218 * free or freeable LEB if one is present. For example, GC must to set it to %1,
@@ -231,6 +225,10 @@ static const struct ubifs_lprops *scan_for_dirty(struct ubifs_info *c,
231 * 225 *
232 * In addition @pick_free is set to %2 by the recovery process in order to 226 * In addition @pick_free is set to %2 by the recovery process in order to
233 * recover gc_lnum in which case an index LEB must not be returned. 227 * recover gc_lnum in which case an index LEB must not be returned.
228 *
229 * This function returns zero and the LEB properties of found dirty LEB in case
230 * of success, %-ENOSPC if no dirty LEB was found and a negative error code in
231 * case of other failures. The returned LEB is marked as "taken".
234 */ 232 */
235int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp, 233int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp,
236 int min_space, int pick_free) 234 int min_space, int pick_free)
@@ -245,7 +243,7 @@ int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp,
245 int lebs, rsvd_idx_lebs = 0; 243 int lebs, rsvd_idx_lebs = 0;
246 244
247 spin_lock(&c->space_lock); 245 spin_lock(&c->space_lock);
248 lebs = c->lst.empty_lebs; 246 lebs = c->lst.empty_lebs + c->idx_gc_cnt;
249 lebs += c->freeable_cnt - c->lst.taken_empty_lebs; 247 lebs += c->freeable_cnt - c->lst.taken_empty_lebs;
250 248
251 /* 249 /*
@@ -317,7 +315,7 @@ int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp,
317 lp = idx_lp; 315 lp = idx_lp;
318 316
319 if (lp) { 317 if (lp) {
320 ubifs_assert(lp->dirty >= c->dead_wm); 318 ubifs_assert(lp->free + lp->dirty >= c->dead_wm);
321 goto found; 319 goto found;
322 } 320 }
323 321
@@ -509,7 +507,6 @@ int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *free,
509 rsvd_idx_lebs = 0; 507 rsvd_idx_lebs = 0;
510 lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt - 508 lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt -
511 c->lst.taken_empty_lebs; 509 c->lst.taken_empty_lebs;
512 ubifs_assert(lebs + c->lst.idx_lebs >= c->min_idx_lebs);
513 if (rsvd_idx_lebs < lebs) 510 if (rsvd_idx_lebs < lebs)
514 /* 511 /*
515 * OK to allocate an empty LEB, but we still don't want to go 512 * OK to allocate an empty LEB, but we still don't want to go
diff --git a/fs/ubifs/gc.c b/fs/ubifs/gc.c
index d0f3dac29081..02aba36fe3d4 100644
--- a/fs/ubifs/gc.c
+++ b/fs/ubifs/gc.c
@@ -334,15 +334,21 @@ int ubifs_garbage_collect_leb(struct ubifs_info *c, struct ubifs_lprops *lp)
334 334
335 err = move_nodes(c, sleb); 335 err = move_nodes(c, sleb);
336 if (err) 336 if (err)
337 goto out; 337 goto out_inc_seq;
338 338
339 err = gc_sync_wbufs(c); 339 err = gc_sync_wbufs(c);
340 if (err) 340 if (err)
341 goto out; 341 goto out_inc_seq;
342 342
343 err = ubifs_change_one_lp(c, lnum, c->leb_size, 0, 0, 0, 0); 343 err = ubifs_change_one_lp(c, lnum, c->leb_size, 0, 0, 0, 0);
344 if (err) 344 if (err)
345 goto out; 345 goto out_inc_seq;
346
347 /* Allow for races with TNC */
348 c->gced_lnum = lnum;
349 smp_wmb();
350 c->gc_seq += 1;
351 smp_wmb();
346 352
347 if (c->gc_lnum == -1) { 353 if (c->gc_lnum == -1) {
348 c->gc_lnum = lnum; 354 c->gc_lnum = lnum;
@@ -363,6 +369,14 @@ int ubifs_garbage_collect_leb(struct ubifs_info *c, struct ubifs_lprops *lp)
363out: 369out:
364 ubifs_scan_destroy(sleb); 370 ubifs_scan_destroy(sleb);
365 return err; 371 return err;
372
373out_inc_seq:
374 /* We may have moved at least some nodes so allow for races with TNC */
375 c->gced_lnum = lnum;
376 smp_wmb();
377 c->gc_seq += 1;
378 smp_wmb();
379 goto out;
366} 380}
367 381
368/** 382/**
diff --git a/fs/ubifs/misc.h b/fs/ubifs/misc.h
index 87dabf9fe742..4c12a9215d7f 100644
--- a/fs/ubifs/misc.h
+++ b/fs/ubifs/misc.h
@@ -284,38 +284,6 @@ static inline void *ubifs_idx_key(const struct ubifs_info *c,
284} 284}
285 285
286/** 286/**
287 * ubifs_reported_space - calculate reported free space.
288 * @c: the UBIFS file-system description object
289 * @free: amount of free space
290 *
291 * This function calculates amount of free space which will be reported to
292 * user-space. User-space application tend to expect that if the file-system
293 * (e.g., via the 'statfs()' call) reports that it has N bytes available, they
294 * are able to write a file of size N. UBIFS attaches node headers to each data
295 * node and it has to write indexind nodes as well. This introduces additional
296 * overhead, and UBIFS it has to report sligtly less free space to meet the
297 * above expectetion.
298 *
299 * This function assumes free space is made up of uncompressed data nodes and
300 * full index nodes (one per data node, doubled because we always allow enough
301 * space to write the index twice).
302 *
303 * Note, the calculation is pessimistic, which means that most of the time
304 * UBIFS reports less space than it actually has.
305 */
306static inline long long ubifs_reported_space(const struct ubifs_info *c,
307 uint64_t free)
308{
309 int divisor, factor;
310
311 divisor = UBIFS_MAX_DATA_NODE_SZ + (c->max_idx_node_sz * 3);
312 factor = UBIFS_MAX_DATA_NODE_SZ - UBIFS_DATA_NODE_SZ;
313 do_div(free, divisor);
314
315 return free * factor;
316}
317
318/**
319 * ubifs_current_time - round current time to time granularity. 287 * ubifs_current_time - round current time to time granularity.
320 * @inode: inode 288 * @inode: inode
321 */ 289 */
@@ -325,4 +293,21 @@ static inline struct timespec ubifs_current_time(struct inode *inode)
325 current_fs_time(inode->i_sb) : CURRENT_TIME_SEC; 293 current_fs_time(inode->i_sb) : CURRENT_TIME_SEC;
326} 294}
327 295
296/**
297 * ubifs_tnc_lookup - look up a file-system node.
298 * @c: UBIFS file-system description object
299 * @key: node key to lookup
300 * @node: the node is returned here
301 *
302 * This function look up and reads node with key @key. The caller has to make
303 * sure the @node buffer is large enough to fit the node. Returns zero in case
304 * of success, %-ENOENT if the node was not found, and a negative error code in
305 * case of failure.
306 */
307static inline int ubifs_tnc_lookup(struct ubifs_info *c,
308 const union ubifs_key *key, void *node)
309{
310 return ubifs_tnc_locate(c, key, node, NULL, NULL);
311}
312
328#endif /* __UBIFS_MISC_H__ */ 313#endif /* __UBIFS_MISC_H__ */
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index f71e6b8822c4..3f4902060c7a 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -370,8 +370,9 @@ static int ubifs_statfs(struct dentry *dentry, struct kstatfs *buf)
370{ 370{
371 struct ubifs_info *c = dentry->d_sb->s_fs_info; 371 struct ubifs_info *c = dentry->d_sb->s_fs_info;
372 unsigned long long free; 372 unsigned long long free;
373 __le32 *uuid = (__le32 *)c->uuid;
373 374
374 free = ubifs_budg_get_free_space(c); 375 free = ubifs_get_free_space(c);
375 dbg_gen("free space %lld bytes (%lld blocks)", 376 dbg_gen("free space %lld bytes (%lld blocks)",
376 free, free >> UBIFS_BLOCK_SHIFT); 377 free, free >> UBIFS_BLOCK_SHIFT);
377 378
@@ -386,7 +387,8 @@ static int ubifs_statfs(struct dentry *dentry, struct kstatfs *buf)
386 buf->f_files = 0; 387 buf->f_files = 0;
387 buf->f_ffree = 0; 388 buf->f_ffree = 0;
388 buf->f_namelen = UBIFS_MAX_NLEN; 389 buf->f_namelen = UBIFS_MAX_NLEN;
389 390 buf->f_fsid.val[0] = le32_to_cpu(uuid[0]) ^ le32_to_cpu(uuid[2]);
391 buf->f_fsid.val[1] = le32_to_cpu(uuid[1]) ^ le32_to_cpu(uuid[3]);
390 return 0; 392 return 0;
391} 393}
392 394
@@ -530,6 +532,12 @@ static int init_constants_early(struct ubifs_info *c)
530 c->dead_wm = ALIGN(MIN_WRITE_SZ, c->min_io_size); 532 c->dead_wm = ALIGN(MIN_WRITE_SZ, c->min_io_size);
531 c->dark_wm = ALIGN(UBIFS_MAX_NODE_SZ, c->min_io_size); 533 c->dark_wm = ALIGN(UBIFS_MAX_NODE_SZ, c->min_io_size);
532 534
535 /*
536 * Calculate how many bytes would be wasted at the end of LEB if it was
537 * fully filled with data nodes of maximum size. This is used in
538 * calculations when reporting free space.
539 */
540 c->leb_overhead = c->leb_size % UBIFS_MAX_DATA_NODE_SZ;
533 return 0; 541 return 0;
534} 542}
535 543
@@ -647,13 +655,11 @@ static int init_constants_late(struct ubifs_info *c)
647 * internally because it does not make much sense for UBIFS, but it is 655 * internally because it does not make much sense for UBIFS, but it is
648 * necessary to report something for the 'statfs()' call. 656 * necessary to report something for the 'statfs()' call.
649 * 657 *
650 * Subtract the LEB reserved for GC and the LEB which is reserved for 658 * Subtract the LEB reserved for GC, the LEB which is reserved for
651 * deletions. 659 * deletions, and assume only one journal head is available.
652 *
653 * Review 'ubifs_calc_available()' if changing this calculation.
654 */ 660 */
655 tmp64 = c->main_lebs - 2; 661 tmp64 = c->main_lebs - 2 - c->jhead_cnt + 1;
656 tmp64 *= (uint64_t)c->leb_size - c->dark_wm; 662 tmp64 *= (uint64_t)c->leb_size - c->leb_overhead;
657 tmp64 = ubifs_reported_space(c, tmp64); 663 tmp64 = ubifs_reported_space(c, tmp64);
658 c->block_cnt = tmp64 >> UBIFS_BLOCK_SHIFT; 664 c->block_cnt = tmp64 >> UBIFS_BLOCK_SHIFT;
659 665
@@ -1018,14 +1024,13 @@ static int mount_ubifs(struct ubifs_info *c)
1018 goto out_dereg; 1024 goto out_dereg;
1019 } 1025 }
1020 1026
1027 sprintf(c->bgt_name, BGT_NAME_PATTERN, c->vi.ubi_num, c->vi.vol_id);
1021 if (!mounted_read_only) { 1028 if (!mounted_read_only) {
1022 err = alloc_wbufs(c); 1029 err = alloc_wbufs(c);
1023 if (err) 1030 if (err)
1024 goto out_cbuf; 1031 goto out_cbuf;
1025 1032
1026 /* Create background thread */ 1033 /* Create background thread */
1027 sprintf(c->bgt_name, BGT_NAME_PATTERN, c->vi.ubi_num,
1028 c->vi.vol_id);
1029 c->bgt = kthread_create(ubifs_bg_thread, c, c->bgt_name); 1034 c->bgt = kthread_create(ubifs_bg_thread, c, c->bgt_name);
1030 if (!c->bgt) 1035 if (!c->bgt)
1031 c->bgt = ERR_PTR(-EINVAL); 1036 c->bgt = ERR_PTR(-EINVAL);
diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c
index e909f4a96443..7634c5970887 100644
--- a/fs/ubifs/tnc.c
+++ b/fs/ubifs/tnc.c
@@ -506,7 +506,7 @@ static int fallible_read_node(struct ubifs_info *c, const union ubifs_key *key,
506 if (keys_cmp(c, key, &node_key) != 0) 506 if (keys_cmp(c, key, &node_key) != 0)
507 ret = 0; 507 ret = 0;
508 } 508 }
509 if (ret == 0) 509 if (ret == 0 && c->replaying)
510 dbg_mnt("dangling branch LEB %d:%d len %d, key %s", 510 dbg_mnt("dangling branch LEB %d:%d len %d, key %s",
511 zbr->lnum, zbr->offs, zbr->len, DBGKEY(key)); 511 zbr->lnum, zbr->offs, zbr->len, DBGKEY(key));
512 return ret; 512 return ret;
@@ -1382,50 +1382,39 @@ static int lookup_level0_dirty(struct ubifs_info *c, const union ubifs_key *key,
1382} 1382}
1383 1383
1384/** 1384/**
1385 * ubifs_tnc_lookup - look up a file-system node. 1385 * maybe_leb_gced - determine if a LEB may have been garbage collected.
1386 * @c: UBIFS file-system description object 1386 * @c: UBIFS file-system description object
1387 * @key: node key to lookup 1387 * @lnum: LEB number
1388 * @node: the node is returned here 1388 * @gc_seq1: garbage collection sequence number
1389 * 1389 *
1390 * This function look up and reads node with key @key. The caller has to make 1390 * This function determines if @lnum may have been garbage collected since
1391 * sure the @node buffer is large enough to fit the node. Returns zero in case 1391 * sequence number @gc_seq1. If it may have been then %1 is returned, otherwise
1392 * of success, %-ENOENT if the node was not found, and a negative error code in 1392 * %0 is returned.
1393 * case of failure.
1394 */ 1393 */
1395int ubifs_tnc_lookup(struct ubifs_info *c, const union ubifs_key *key, 1394static int maybe_leb_gced(struct ubifs_info *c, int lnum, int gc_seq1)
1396 void *node)
1397{ 1395{
1398 int found, n, err; 1396 int gc_seq2, gced_lnum;
1399 struct ubifs_znode *znode;
1400 struct ubifs_zbranch zbr, *zt;
1401 1397
1402 mutex_lock(&c->tnc_mutex); 1398 gced_lnum = c->gced_lnum;
1403 found = ubifs_lookup_level0(c, key, &znode, &n); 1399 smp_rmb();
1404 if (!found) { 1400 gc_seq2 = c->gc_seq;
1405 err = -ENOENT; 1401 /* Same seq means no GC */
1406 goto out; 1402 if (gc_seq1 == gc_seq2)
1407 } else if (found < 0) { 1403 return 0;
1408 err = found; 1404 /* Different by more than 1 means we don't know */
1409 goto out; 1405 if (gc_seq1 + 1 != gc_seq2)
1410 } 1406 return 1;
1411 zt = &znode->zbranch[n]; 1407 /*
1412 if (is_hash_key(c, key)) { 1408 * We have seen the sequence number has increased by 1. Now we need to
1413 /* 1409 * be sure we read the right LEB number, so read it again.
1414 * In this case the leaf node cache gets used, so we pass the 1410 */
1415 * address of the zbranch and keep the mutex locked 1411 smp_rmb();
1416 */ 1412 if (gced_lnum != c->gced_lnum)
1417 err = tnc_read_node_nm(c, zt, node); 1413 return 1;
1418 goto out; 1414 /* Finally we can check lnum */
1419 } 1415 if (gced_lnum == lnum)
1420 zbr = znode->zbranch[n]; 1416 return 1;
1421 mutex_unlock(&c->tnc_mutex); 1417 return 0;
1422
1423 err = ubifs_tnc_read_node(c, &zbr, node);
1424 return err;
1425
1426out:
1427 mutex_unlock(&c->tnc_mutex);
1428 return err;
1429} 1418}
1430 1419
1431/** 1420/**
@@ -1436,16 +1425,19 @@ out:
1436 * @lnum: LEB number is returned here 1425 * @lnum: LEB number is returned here
1437 * @offs: offset is returned here 1426 * @offs: offset is returned here
1438 * 1427 *
1439 * This function is the same as 'ubifs_tnc_lookup()' but it returns the node 1428 * This function look up and reads node with key @key. The caller has to make
1440 * location also. See 'ubifs_tnc_lookup()'. 1429 * sure the @node buffer is large enough to fit the node. Returns zero in case
1430 * of success, %-ENOENT if the node was not found, and a negative error code in
1431 * case of failure. The node location can be returned in @lnum and @offs.
1441 */ 1432 */
1442int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key, 1433int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key,
1443 void *node, int *lnum, int *offs) 1434 void *node, int *lnum, int *offs)
1444{ 1435{
1445 int found, n, err; 1436 int found, n, err, safely = 0, gc_seq1;
1446 struct ubifs_znode *znode; 1437 struct ubifs_znode *znode;
1447 struct ubifs_zbranch zbr, *zt; 1438 struct ubifs_zbranch zbr, *zt;
1448 1439
1440again:
1449 mutex_lock(&c->tnc_mutex); 1441 mutex_lock(&c->tnc_mutex);
1450 found = ubifs_lookup_level0(c, key, &znode, &n); 1442 found = ubifs_lookup_level0(c, key, &znode, &n);
1451 if (!found) { 1443 if (!found) {
@@ -1456,24 +1448,43 @@ int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key,
1456 goto out; 1448 goto out;
1457 } 1449 }
1458 zt = &znode->zbranch[n]; 1450 zt = &znode->zbranch[n];
1451 if (lnum) {
1452 *lnum = zt->lnum;
1453 *offs = zt->offs;
1454 }
1459 if (is_hash_key(c, key)) { 1455 if (is_hash_key(c, key)) {
1460 /* 1456 /*
1461 * In this case the leaf node cache gets used, so we pass the 1457 * In this case the leaf node cache gets used, so we pass the
1462 * address of the zbranch and keep the mutex locked 1458 * address of the zbranch and keep the mutex locked
1463 */ 1459 */
1464 *lnum = zt->lnum;
1465 *offs = zt->offs;
1466 err = tnc_read_node_nm(c, zt, node); 1460 err = tnc_read_node_nm(c, zt, node);
1467 goto out; 1461 goto out;
1468 } 1462 }
1463 if (safely) {
1464 err = ubifs_tnc_read_node(c, zt, node);
1465 goto out;
1466 }
1467 /* Drop the TNC mutex prematurely and race with garbage collection */
1469 zbr = znode->zbranch[n]; 1468 zbr = znode->zbranch[n];
1469 gc_seq1 = c->gc_seq;
1470 mutex_unlock(&c->tnc_mutex); 1470 mutex_unlock(&c->tnc_mutex);
1471 1471
1472 *lnum = zbr.lnum; 1472 if (ubifs_get_wbuf(c, zbr.lnum)) {
1473 *offs = zbr.offs; 1473 /* We do not GC journal heads */
1474 err = ubifs_tnc_read_node(c, &zbr, node);
1475 return err;
1476 }
1474 1477
1475 err = ubifs_tnc_read_node(c, &zbr, node); 1478 err = fallible_read_node(c, key, &zbr, node);
1476 return err; 1479 if (err <= 0 || maybe_leb_gced(c, zbr.lnum, gc_seq1)) {
1480 /*
1481 * The node may have been GC'ed out from under us so try again
1482 * while keeping the TNC mutex locked.
1483 */
1484 safely = 1;
1485 goto again;
1486 }
1487 return 0;
1477 1488
1478out: 1489out:
1479 mutex_unlock(&c->tnc_mutex); 1490 mutex_unlock(&c->tnc_mutex);
@@ -1498,7 +1509,6 @@ static int do_lookup_nm(struct ubifs_info *c, const union ubifs_key *key,
1498{ 1509{
1499 int found, n, err; 1510 int found, n, err;
1500 struct ubifs_znode *znode; 1511 struct ubifs_znode *znode;
1501 struct ubifs_zbranch zbr;
1502 1512
1503 dbg_tnc("name '%.*s' key %s", nm->len, nm->name, DBGKEY(key)); 1513 dbg_tnc("name '%.*s' key %s", nm->len, nm->name, DBGKEY(key));
1504 mutex_lock(&c->tnc_mutex); 1514 mutex_lock(&c->tnc_mutex);
@@ -1522,11 +1532,7 @@ static int do_lookup_nm(struct ubifs_info *c, const union ubifs_key *key,
1522 goto out_unlock; 1532 goto out_unlock;
1523 } 1533 }
1524 1534
1525 zbr = znode->zbranch[n]; 1535 err = tnc_read_node_nm(c, &znode->zbranch[n], node);
1526 mutex_unlock(&c->tnc_mutex);
1527
1528 err = tnc_read_node_nm(c, &zbr, node);
1529 return err;
1530 1536
1531out_unlock: 1537out_unlock:
1532 mutex_unlock(&c->tnc_mutex); 1538 mutex_unlock(&c->tnc_mutex);
diff --git a/fs/ubifs/ubifs-media.h b/fs/ubifs/ubifs-media.h
index bd2121f3426e..a9ecbd9af20d 100644
--- a/fs/ubifs/ubifs-media.h
+++ b/fs/ubifs/ubifs-media.h
@@ -87,7 +87,7 @@
87#define UBIFS_SK_LEN 8 87#define UBIFS_SK_LEN 8
88 88
89/* Minimum index tree fanout */ 89/* Minimum index tree fanout */
90#define UBIFS_MIN_FANOUT 2 90#define UBIFS_MIN_FANOUT 3
91 91
92/* Maximum number of levels in UBIFS indexing B-tree */ 92/* Maximum number of levels in UBIFS indexing B-tree */
93#define UBIFS_MAX_LEVELS 512 93#define UBIFS_MAX_LEVELS 512
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index d7f706f7a302..17c620b93eec 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -995,6 +995,9 @@ struct ubifs_mount_opts {
995 * @max_idx_node_sz: maximum indexing node aligned on 8-bytes boundary 995 * @max_idx_node_sz: maximum indexing node aligned on 8-bytes boundary
996 * @max_inode_sz: maximum possible inode size in bytes 996 * @max_inode_sz: maximum possible inode size in bytes
997 * @max_znode_sz: size of znode in bytes 997 * @max_znode_sz: size of znode in bytes
998 *
999 * @leb_overhead: how many bytes are wasted in an LEB when it is filled with
1000 * data nodes of maximum size - used in free space reporting
998 * @dead_wm: LEB dead space watermark 1001 * @dead_wm: LEB dead space watermark
999 * @dark_wm: LEB dark space watermark 1002 * @dark_wm: LEB dark space watermark
1000 * @block_cnt: count of 4KiB blocks on the FS 1003 * @block_cnt: count of 4KiB blocks on the FS
@@ -1028,6 +1031,8 @@ struct ubifs_mount_opts {
1028 * @sbuf: a buffer of LEB size used by GC and replay for scanning 1031 * @sbuf: a buffer of LEB size used by GC and replay for scanning
1029 * @idx_gc: list of index LEBs that have been garbage collected 1032 * @idx_gc: list of index LEBs that have been garbage collected
1030 * @idx_gc_cnt: number of elements on the idx_gc list 1033 * @idx_gc_cnt: number of elements on the idx_gc list
1034 * @gc_seq: incremented for every non-index LEB garbage collected
1035 * @gced_lnum: last non-index LEB that was garbage collected
1031 * 1036 *
1032 * @infos_list: links all 'ubifs_info' objects 1037 * @infos_list: links all 'ubifs_info' objects
1033 * @umount_mutex: serializes shrinker and un-mount 1038 * @umount_mutex: serializes shrinker and un-mount
@@ -1224,6 +1229,8 @@ struct ubifs_info {
1224 int max_idx_node_sz; 1229 int max_idx_node_sz;
1225 long long max_inode_sz; 1230 long long max_inode_sz;
1226 int max_znode_sz; 1231 int max_znode_sz;
1232
1233 int leb_overhead;
1227 int dead_wm; 1234 int dead_wm;
1228 int dark_wm; 1235 int dark_wm;
1229 int block_cnt; 1236 int block_cnt;
@@ -1257,6 +1264,8 @@ struct ubifs_info {
1257 void *sbuf; 1264 void *sbuf;
1258 struct list_head idx_gc; 1265 struct list_head idx_gc;
1259 int idx_gc_cnt; 1266 int idx_gc_cnt;
1267 volatile int gc_seq;
1268 volatile int gced_lnum;
1260 1269
1261 struct list_head infos_list; 1270 struct list_head infos_list;
1262 struct mutex umount_mutex; 1271 struct mutex umount_mutex;
@@ -1434,9 +1443,10 @@ void ubifs_release_ino_dirty(struct ubifs_info *c, struct inode *inode,
1434 struct ubifs_budget_req *req); 1443 struct ubifs_budget_req *req);
1435void ubifs_cancel_ino_op(struct ubifs_info *c, struct inode *inode, 1444void ubifs_cancel_ino_op(struct ubifs_info *c, struct inode *inode,
1436 struct ubifs_budget_req *req); 1445 struct ubifs_budget_req *req);
1437long long ubifs_budg_get_free_space(struct ubifs_info *c); 1446long long ubifs_get_free_space(struct ubifs_info *c);
1438int ubifs_calc_min_idx_lebs(struct ubifs_info *c); 1447int ubifs_calc_min_idx_lebs(struct ubifs_info *c);
1439void ubifs_convert_page_budget(struct ubifs_info *c); 1448void ubifs_convert_page_budget(struct ubifs_info *c);
1449long long ubifs_reported_space(const struct ubifs_info *c, uint64_t free);
1440long long ubifs_calc_available(const struct ubifs_info *c, int min_idx_lebs); 1450long long ubifs_calc_available(const struct ubifs_info *c, int min_idx_lebs);
1441 1451
1442/* find.c */ 1452/* find.c */
@@ -1451,8 +1461,6 @@ int ubifs_save_dirty_idx_lnums(struct ubifs_info *c);
1451/* tnc.c */ 1461/* tnc.c */
1452int ubifs_lookup_level0(struct ubifs_info *c, const union ubifs_key *key, 1462int ubifs_lookup_level0(struct ubifs_info *c, const union ubifs_key *key,
1453 struct ubifs_znode **zn, int *n); 1463 struct ubifs_znode **zn, int *n);
1454int ubifs_tnc_lookup(struct ubifs_info *c, const union ubifs_key *key,
1455 void *node);
1456int ubifs_tnc_lookup_nm(struct ubifs_info *c, const union ubifs_key *key, 1464int ubifs_tnc_lookup_nm(struct ubifs_info *c, const union ubifs_key *key,
1457 void *node, const struct qstr *nm); 1465 void *node, const struct qstr *nm);
1458int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key, 1466int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key,