diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2008-09-09 14:52:12 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2008-09-09 14:52:12 -0400 |
commit | b975dee3811ae0c58bd0e19cbd041cac8dd37ec5 (patch) | |
tree | d65f5381024a00982e47fd6144d219bb6b34ff78 /fs | |
parent | deac93df26b20cf8438339b5935b5f5643bc30c9 (diff) | |
parent | a5cb562d6977d9d7989c346b7b153cef31ec0228 (diff) |
Merge branch 'linux-next' of git://git.infradead.org/~dedekind/ubifs-2.6
* 'linux-next' of git://git.infradead.org/~dedekind/ubifs-2.6:
UBIFS: make minimum fanout 3
UBIFS: fix division by zero
UBIFS: amend f_fsid
UBIFS: fill f_fsid
UBIFS: improve statfs reporting even more
UBIFS: introduce LEB overhead
UBIFS: add forgotten gc_idx_lebs component
UBIFS: fix assertion
UBIFS: improve statfs reporting
UBIFS: remove incorrect index space check
UBIFS: push empty flash hack down
UBIFS: do not update min_idx_lebs in stafs
UBIFS: allow for racing between GC and TNC
UBIFS: always read hashed-key nodes under TNC mutex
UBIFS: fix zero-length truncations
Diffstat (limited to 'fs')
-rw-r--r-- | fs/ubifs/budget.c | 114 | ||||
-rw-r--r-- | fs/ubifs/dir.c | 1 | ||||
-rw-r--r-- | fs/ubifs/file.c | 20 | ||||
-rw-r--r-- | fs/ubifs/find.c | 18 | ||||
-rw-r--r-- | fs/ubifs/gc.c | 6 | ||||
-rw-r--r-- | fs/ubifs/misc.h | 49 | ||||
-rw-r--r-- | fs/ubifs/super.c | 22 | ||||
-rw-r--r-- | fs/ubifs/tnc.c | 116 | ||||
-rw-r--r-- | fs/ubifs/ubifs-media.h | 2 | ||||
-rw-r--r-- | fs/ubifs/ubifs.h | 14 |
10 files changed, 221 insertions, 141 deletions
diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c index 154098157473..73db464cd08b 100644 --- a/fs/ubifs/budget.c +++ b/fs/ubifs/budget.c | |||
@@ -302,18 +302,6 @@ long long ubifs_calc_available(const struct ubifs_info *c, int min_idx_lebs) | |||
302 | int subtract_lebs; | 302 | int subtract_lebs; |
303 | long long available; | 303 | long long available; |
304 | 304 | ||
305 | /* | ||
306 | * Force the amount available to the total size reported if the used | ||
307 | * space is zero. | ||
308 | */ | ||
309 | if (c->lst.total_used <= UBIFS_INO_NODE_SZ && | ||
310 | c->budg_data_growth + c->budg_dd_growth == 0) { | ||
311 | /* Do the same calculation as for c->block_cnt */ | ||
312 | available = c->main_lebs - 2; | ||
313 | available *= c->leb_size - c->dark_wm; | ||
314 | return available; | ||
315 | } | ||
316 | |||
317 | available = c->main_bytes - c->lst.total_used; | 305 | available = c->main_bytes - c->lst.total_used; |
318 | 306 | ||
319 | /* | 307 | /* |
@@ -714,34 +702,106 @@ void ubifs_release_dirty_inode_budget(struct ubifs_info *c, | |||
714 | } | 702 | } |
715 | 703 | ||
716 | /** | 704 | /** |
717 | * ubifs_budg_get_free_space - return amount of free space. | 705 | * ubifs_reported_space - calculate reported free space. |
706 | * @c: the UBIFS file-system description object | ||
707 | * @free: amount of free space | ||
708 | * | ||
709 | * This function calculates amount of free space which will be reported to | ||
710 | * user-space. User-space application tend to expect that if the file-system | ||
711 | * (e.g., via the 'statfs()' call) reports that it has N bytes available, they | ||
712 | * are able to write a file of size N. UBIFS attaches node headers to each data | ||
713 | * node and it has to write indexind nodes as well. This introduces additional | ||
714 | * overhead, and UBIFS it has to report sligtly less free space to meet the | ||
715 | * above expectetion. | ||
716 | * | ||
717 | * This function assumes free space is made up of uncompressed data nodes and | ||
718 | * full index nodes (one per data node, tripled because we always allow enough | ||
719 | * space to write the index thrice). | ||
720 | * | ||
721 | * Note, the calculation is pessimistic, which means that most of the time | ||
722 | * UBIFS reports less space than it actually has. | ||
723 | */ | ||
724 | long long ubifs_reported_space(const struct ubifs_info *c, uint64_t free) | ||
725 | { | ||
726 | int divisor, factor, f; | ||
727 | |||
728 | /* | ||
729 | * Reported space size is @free * X, where X is UBIFS block size | ||
730 | * divided by UBIFS block size + all overhead one data block | ||
731 | * introduces. The overhead is the node header + indexing overhead. | ||
732 | * | ||
733 | * Indexing overhead calculations are based on the following formula: | ||
734 | * I = N/(f - 1) + 1, where I - number of indexing nodes, N - number | ||
735 | * of data nodes, f - fanout. Because effective UBIFS fanout is twice | ||
736 | * as less than maximum fanout, we assume that each data node | ||
737 | * introduces 3 * @c->max_idx_node_sz / (@c->fanout/2 - 1) bytes. | ||
738 | * Note, the multiplier 3 is because UBIFS reseves thrice as more space | ||
739 | * for the index. | ||
740 | */ | ||
741 | f = c->fanout > 3 ? c->fanout >> 1 : 2; | ||
742 | factor = UBIFS_BLOCK_SIZE; | ||
743 | divisor = UBIFS_MAX_DATA_NODE_SZ; | ||
744 | divisor += (c->max_idx_node_sz * 3) / (f - 1); | ||
745 | free *= factor; | ||
746 | do_div(free, divisor); | ||
747 | return free; | ||
748 | } | ||
749 | |||
750 | /** | ||
751 | * ubifs_get_free_space - return amount of free space. | ||
718 | * @c: UBIFS file-system description object | 752 | * @c: UBIFS file-system description object |
719 | * | 753 | * |
720 | * This function returns amount of free space on the file-system. | 754 | * This function calculates amount of free space to report to user-space. |
755 | * | ||
756 | * Because UBIFS may introduce substantial overhead (the index, node headers, | ||
757 | * alighment, wastage at the end of eraseblocks, etc), it cannot report real | ||
758 | * amount of free flash space it has (well, because not all dirty space is | ||
759 | * reclamable, UBIFS does not actually know the real amount). If UBIFS did so, | ||
760 | * it would bread user expectetion about what free space is. Users seem to | ||
761 | * accustomed to assume that if the file-system reports N bytes of free space, | ||
762 | * they would be able to fit a file of N bytes to the FS. This almost works for | ||
763 | * traditional file-systems, because they have way less overhead than UBIFS. | ||
764 | * So, to keep users happy, UBIFS tries to take the overhead into account. | ||
721 | */ | 765 | */ |
722 | long long ubifs_budg_get_free_space(struct ubifs_info *c) | 766 | long long ubifs_get_free_space(struct ubifs_info *c) |
723 | { | 767 | { |
724 | int min_idx_lebs, rsvd_idx_lebs; | 768 | int min_idx_lebs, rsvd_idx_lebs, lebs; |
725 | long long available, outstanding, free; | 769 | long long available, outstanding, free; |
726 | 770 | ||
727 | /* Do exactly the same calculations as in 'do_budget_space()' */ | ||
728 | spin_lock(&c->space_lock); | 771 | spin_lock(&c->space_lock); |
729 | min_idx_lebs = ubifs_calc_min_idx_lebs(c); | 772 | min_idx_lebs = ubifs_calc_min_idx_lebs(c); |
773 | outstanding = c->budg_data_growth + c->budg_dd_growth; | ||
730 | 774 | ||
731 | if (min_idx_lebs > c->lst.idx_lebs) | 775 | /* |
732 | rsvd_idx_lebs = min_idx_lebs - c->lst.idx_lebs; | 776 | * Force the amount available to the total size reported if the used |
733 | else | 777 | * space is zero. |
734 | rsvd_idx_lebs = 0; | 778 | */ |
735 | 779 | if (c->lst.total_used <= UBIFS_INO_NODE_SZ && !outstanding) { | |
736 | if (rsvd_idx_lebs > c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt | ||
737 | - c->lst.taken_empty_lebs) { | ||
738 | spin_unlock(&c->space_lock); | 780 | spin_unlock(&c->space_lock); |
739 | return 0; | 781 | return (long long)c->block_cnt << UBIFS_BLOCK_SHIFT; |
740 | } | 782 | } |
741 | 783 | ||
742 | available = ubifs_calc_available(c, min_idx_lebs); | 784 | available = ubifs_calc_available(c, min_idx_lebs); |
743 | outstanding = c->budg_data_growth + c->budg_dd_growth; | 785 | |
744 | c->min_idx_lebs = min_idx_lebs; | 786 | /* |
787 | * When reporting free space to user-space, UBIFS guarantees that it is | ||
788 | * possible to write a file of free space size. This means that for | ||
789 | * empty LEBs we may use more precise calculations than | ||
790 | * 'ubifs_calc_available()' is using. Namely, we know that in empty | ||
791 | * LEBs we would waste only @c->leb_overhead bytes, not @c->dark_wm. | ||
792 | * Thus, amend the available space. | ||
793 | * | ||
794 | * Note, the calculations below are similar to what we have in | ||
795 | * 'do_budget_space()', so refer there for comments. | ||
796 | */ | ||
797 | if (min_idx_lebs > c->lst.idx_lebs) | ||
798 | rsvd_idx_lebs = min_idx_lebs - c->lst.idx_lebs; | ||
799 | else | ||
800 | rsvd_idx_lebs = 0; | ||
801 | lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt - | ||
802 | c->lst.taken_empty_lebs; | ||
803 | lebs -= rsvd_idx_lebs; | ||
804 | available += lebs * (c->dark_wm - c->leb_overhead); | ||
745 | spin_unlock(&c->space_lock); | 805 | spin_unlock(&c->space_lock); |
746 | 806 | ||
747 | if (available > outstanding) | 807 | if (available > outstanding) |
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index 5c96f1fb7016..2b267c9a1806 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c | |||
@@ -587,7 +587,6 @@ static int ubifs_unlink(struct inode *dir, struct dentry *dentry) | |||
587 | if (err) { | 587 | if (err) { |
588 | if (err != -ENOSPC) | 588 | if (err != -ENOSPC) |
589 | return err; | 589 | return err; |
590 | err = 0; | ||
591 | budgeted = 0; | 590 | budgeted = 0; |
592 | } | 591 | } |
593 | 592 | ||
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index 4071d1cae29f..3d698e2022b1 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c | |||
@@ -793,7 +793,7 @@ static int do_truncation(struct ubifs_info *c, struct inode *inode, | |||
793 | int err; | 793 | int err; |
794 | struct ubifs_budget_req req; | 794 | struct ubifs_budget_req req; |
795 | loff_t old_size = inode->i_size, new_size = attr->ia_size; | 795 | loff_t old_size = inode->i_size, new_size = attr->ia_size; |
796 | int offset = new_size & (UBIFS_BLOCK_SIZE - 1); | 796 | int offset = new_size & (UBIFS_BLOCK_SIZE - 1), budgeted = 1; |
797 | struct ubifs_inode *ui = ubifs_inode(inode); | 797 | struct ubifs_inode *ui = ubifs_inode(inode); |
798 | 798 | ||
799 | dbg_gen("ino %lu, size %lld -> %lld", inode->i_ino, old_size, new_size); | 799 | dbg_gen("ino %lu, size %lld -> %lld", inode->i_ino, old_size, new_size); |
@@ -811,8 +811,15 @@ static int do_truncation(struct ubifs_info *c, struct inode *inode, | |||
811 | /* A funny way to budget for truncation node */ | 811 | /* A funny way to budget for truncation node */ |
812 | req.dirtied_ino_d = UBIFS_TRUN_NODE_SZ; | 812 | req.dirtied_ino_d = UBIFS_TRUN_NODE_SZ; |
813 | err = ubifs_budget_space(c, &req); | 813 | err = ubifs_budget_space(c, &req); |
814 | if (err) | 814 | if (err) { |
815 | return err; | 815 | /* |
816 | * Treat truncations to zero as deletion and always allow them, | ||
817 | * just like we do for '->unlink()'. | ||
818 | */ | ||
819 | if (new_size || err != -ENOSPC) | ||
820 | return err; | ||
821 | budgeted = 0; | ||
822 | } | ||
816 | 823 | ||
817 | err = vmtruncate(inode, new_size); | 824 | err = vmtruncate(inode, new_size); |
818 | if (err) | 825 | if (err) |
@@ -869,7 +876,12 @@ static int do_truncation(struct ubifs_info *c, struct inode *inode, | |||
869 | err = ubifs_jnl_truncate(c, inode, old_size, new_size); | 876 | err = ubifs_jnl_truncate(c, inode, old_size, new_size); |
870 | mutex_unlock(&ui->ui_mutex); | 877 | mutex_unlock(&ui->ui_mutex); |
871 | out_budg: | 878 | out_budg: |
872 | ubifs_release_budget(c, &req); | 879 | if (budgeted) |
880 | ubifs_release_budget(c, &req); | ||
881 | else { | ||
882 | c->nospace = c->nospace_rp = 0; | ||
883 | smp_wmb(); | ||
884 | } | ||
873 | return err; | 885 | return err; |
874 | } | 886 | } |
875 | 887 | ||
diff --git a/fs/ubifs/find.c b/fs/ubifs/find.c index adee7b5ddeab..e045c8b55423 100644 --- a/fs/ubifs/find.c +++ b/fs/ubifs/find.c | |||
@@ -211,14 +211,8 @@ static const struct ubifs_lprops *scan_for_dirty(struct ubifs_info *c, | |||
211 | * dirty index heap, and it falls-back to LPT scanning if the heaps are empty | 211 | * dirty index heap, and it falls-back to LPT scanning if the heaps are empty |
212 | * or do not have an LEB which satisfies the @min_space criteria. | 212 | * or do not have an LEB which satisfies the @min_space criteria. |
213 | * | 213 | * |
214 | * Note: | 214 | * Note, LEBs which have less than dead watermark of free + dirty space are |
215 | * o LEBs which have less than dead watermark of dirty space are never picked | 215 | * never picked by this function. |
216 | * by this function; | ||
217 | * | ||
218 | * Returns zero and the LEB properties of | ||
219 | * found dirty LEB in case of success, %-ENOSPC if no dirty LEB was found and a | ||
220 | * negative error code in case of other failures. The returned LEB is marked as | ||
221 | * "taken". | ||
222 | * | 216 | * |
223 | * The additional @pick_free argument controls if this function has to return a | 217 | * The additional @pick_free argument controls if this function has to return a |
224 | * free or freeable LEB if one is present. For example, GC must to set it to %1, | 218 | * free or freeable LEB if one is present. For example, GC must to set it to %1, |
@@ -231,6 +225,10 @@ static const struct ubifs_lprops *scan_for_dirty(struct ubifs_info *c, | |||
231 | * | 225 | * |
232 | * In addition @pick_free is set to %2 by the recovery process in order to | 226 | * In addition @pick_free is set to %2 by the recovery process in order to |
233 | * recover gc_lnum in which case an index LEB must not be returned. | 227 | * recover gc_lnum in which case an index LEB must not be returned. |
228 | * | ||
229 | * This function returns zero and the LEB properties of found dirty LEB in case | ||
230 | * of success, %-ENOSPC if no dirty LEB was found and a negative error code in | ||
231 | * case of other failures. The returned LEB is marked as "taken". | ||
234 | */ | 232 | */ |
235 | int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp, | 233 | int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp, |
236 | int min_space, int pick_free) | 234 | int min_space, int pick_free) |
@@ -245,7 +243,7 @@ int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp, | |||
245 | int lebs, rsvd_idx_lebs = 0; | 243 | int lebs, rsvd_idx_lebs = 0; |
246 | 244 | ||
247 | spin_lock(&c->space_lock); | 245 | spin_lock(&c->space_lock); |
248 | lebs = c->lst.empty_lebs; | 246 | lebs = c->lst.empty_lebs + c->idx_gc_cnt; |
249 | lebs += c->freeable_cnt - c->lst.taken_empty_lebs; | 247 | lebs += c->freeable_cnt - c->lst.taken_empty_lebs; |
250 | 248 | ||
251 | /* | 249 | /* |
@@ -317,7 +315,7 @@ int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp, | |||
317 | lp = idx_lp; | 315 | lp = idx_lp; |
318 | 316 | ||
319 | if (lp) { | 317 | if (lp) { |
320 | ubifs_assert(lp->dirty >= c->dead_wm); | 318 | ubifs_assert(lp->free + lp->dirty >= c->dead_wm); |
321 | goto found; | 319 | goto found; |
322 | } | 320 | } |
323 | 321 | ||
diff --git a/fs/ubifs/gc.c b/fs/ubifs/gc.c index d0f3dac29081..13f1019c859f 100644 --- a/fs/ubifs/gc.c +++ b/fs/ubifs/gc.c | |||
@@ -344,6 +344,12 @@ int ubifs_garbage_collect_leb(struct ubifs_info *c, struct ubifs_lprops *lp) | |||
344 | if (err) | 344 | if (err) |
345 | goto out; | 345 | goto out; |
346 | 346 | ||
347 | /* Allow for races with TNC */ | ||
348 | c->gced_lnum = lnum; | ||
349 | smp_wmb(); | ||
350 | c->gc_seq += 1; | ||
351 | smp_wmb(); | ||
352 | |||
347 | if (c->gc_lnum == -1) { | 353 | if (c->gc_lnum == -1) { |
348 | c->gc_lnum = lnum; | 354 | c->gc_lnum = lnum; |
349 | err = LEB_RETAINED; | 355 | err = LEB_RETAINED; |
diff --git a/fs/ubifs/misc.h b/fs/ubifs/misc.h index 87dabf9fe742..4c12a9215d7f 100644 --- a/fs/ubifs/misc.h +++ b/fs/ubifs/misc.h | |||
@@ -284,38 +284,6 @@ static inline void *ubifs_idx_key(const struct ubifs_info *c, | |||
284 | } | 284 | } |
285 | 285 | ||
286 | /** | 286 | /** |
287 | * ubifs_reported_space - calculate reported free space. | ||
288 | * @c: the UBIFS file-system description object | ||
289 | * @free: amount of free space | ||
290 | * | ||
291 | * This function calculates amount of free space which will be reported to | ||
292 | * user-space. User-space application tend to expect that if the file-system | ||
293 | * (e.g., via the 'statfs()' call) reports that it has N bytes available, they | ||
294 | * are able to write a file of size N. UBIFS attaches node headers to each data | ||
295 | * node and it has to write indexind nodes as well. This introduces additional | ||
296 | * overhead, and UBIFS it has to report sligtly less free space to meet the | ||
297 | * above expectetion. | ||
298 | * | ||
299 | * This function assumes free space is made up of uncompressed data nodes and | ||
300 | * full index nodes (one per data node, doubled because we always allow enough | ||
301 | * space to write the index twice). | ||
302 | * | ||
303 | * Note, the calculation is pessimistic, which means that most of the time | ||
304 | * UBIFS reports less space than it actually has. | ||
305 | */ | ||
306 | static inline long long ubifs_reported_space(const struct ubifs_info *c, | ||
307 | uint64_t free) | ||
308 | { | ||
309 | int divisor, factor; | ||
310 | |||
311 | divisor = UBIFS_MAX_DATA_NODE_SZ + (c->max_idx_node_sz * 3); | ||
312 | factor = UBIFS_MAX_DATA_NODE_SZ - UBIFS_DATA_NODE_SZ; | ||
313 | do_div(free, divisor); | ||
314 | |||
315 | return free * factor; | ||
316 | } | ||
317 | |||
318 | /** | ||
319 | * ubifs_current_time - round current time to time granularity. | 287 | * ubifs_current_time - round current time to time granularity. |
320 | * @inode: inode | 288 | * @inode: inode |
321 | */ | 289 | */ |
@@ -325,4 +293,21 @@ static inline struct timespec ubifs_current_time(struct inode *inode) | |||
325 | current_fs_time(inode->i_sb) : CURRENT_TIME_SEC; | 293 | current_fs_time(inode->i_sb) : CURRENT_TIME_SEC; |
326 | } | 294 | } |
327 | 295 | ||
296 | /** | ||
297 | * ubifs_tnc_lookup - look up a file-system node. | ||
298 | * @c: UBIFS file-system description object | ||
299 | * @key: node key to lookup | ||
300 | * @node: the node is returned here | ||
301 | * | ||
302 | * This function look up and reads node with key @key. The caller has to make | ||
303 | * sure the @node buffer is large enough to fit the node. Returns zero in case | ||
304 | * of success, %-ENOENT if the node was not found, and a negative error code in | ||
305 | * case of failure. | ||
306 | */ | ||
307 | static inline int ubifs_tnc_lookup(struct ubifs_info *c, | ||
308 | const union ubifs_key *key, void *node) | ||
309 | { | ||
310 | return ubifs_tnc_locate(c, key, node, NULL, NULL); | ||
311 | } | ||
312 | |||
328 | #endif /* __UBIFS_MISC_H__ */ | 313 | #endif /* __UBIFS_MISC_H__ */ |
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index f71e6b8822c4..7562464ac83f 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c | |||
@@ -370,8 +370,9 @@ static int ubifs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
370 | { | 370 | { |
371 | struct ubifs_info *c = dentry->d_sb->s_fs_info; | 371 | struct ubifs_info *c = dentry->d_sb->s_fs_info; |
372 | unsigned long long free; | 372 | unsigned long long free; |
373 | __le32 *uuid = (__le32 *)c->uuid; | ||
373 | 374 | ||
374 | free = ubifs_budg_get_free_space(c); | 375 | free = ubifs_get_free_space(c); |
375 | dbg_gen("free space %lld bytes (%lld blocks)", | 376 | dbg_gen("free space %lld bytes (%lld blocks)", |
376 | free, free >> UBIFS_BLOCK_SHIFT); | 377 | free, free >> UBIFS_BLOCK_SHIFT); |
377 | 378 | ||
@@ -386,7 +387,8 @@ static int ubifs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
386 | buf->f_files = 0; | 387 | buf->f_files = 0; |
387 | buf->f_ffree = 0; | 388 | buf->f_ffree = 0; |
388 | buf->f_namelen = UBIFS_MAX_NLEN; | 389 | buf->f_namelen = UBIFS_MAX_NLEN; |
389 | 390 | buf->f_fsid.val[0] = le32_to_cpu(uuid[0]) ^ le32_to_cpu(uuid[2]); | |
391 | buf->f_fsid.val[1] = le32_to_cpu(uuid[1]) ^ le32_to_cpu(uuid[3]); | ||
390 | return 0; | 392 | return 0; |
391 | } | 393 | } |
392 | 394 | ||
@@ -530,6 +532,12 @@ static int init_constants_early(struct ubifs_info *c) | |||
530 | c->dead_wm = ALIGN(MIN_WRITE_SZ, c->min_io_size); | 532 | c->dead_wm = ALIGN(MIN_WRITE_SZ, c->min_io_size); |
531 | c->dark_wm = ALIGN(UBIFS_MAX_NODE_SZ, c->min_io_size); | 533 | c->dark_wm = ALIGN(UBIFS_MAX_NODE_SZ, c->min_io_size); |
532 | 534 | ||
535 | /* | ||
536 | * Calculate how many bytes would be wasted at the end of LEB if it was | ||
537 | * fully filled with data nodes of maximum size. This is used in | ||
538 | * calculations when reporting free space. | ||
539 | */ | ||
540 | c->leb_overhead = c->leb_size % UBIFS_MAX_DATA_NODE_SZ; | ||
533 | return 0; | 541 | return 0; |
534 | } | 542 | } |
535 | 543 | ||
@@ -647,13 +655,11 @@ static int init_constants_late(struct ubifs_info *c) | |||
647 | * internally because it does not make much sense for UBIFS, but it is | 655 | * internally because it does not make much sense for UBIFS, but it is |
648 | * necessary to report something for the 'statfs()' call. | 656 | * necessary to report something for the 'statfs()' call. |
649 | * | 657 | * |
650 | * Subtract the LEB reserved for GC and the LEB which is reserved for | 658 | * Subtract the LEB reserved for GC, the LEB which is reserved for |
651 | * deletions. | 659 | * deletions, and assume only one journal head is available. |
652 | * | ||
653 | * Review 'ubifs_calc_available()' if changing this calculation. | ||
654 | */ | 660 | */ |
655 | tmp64 = c->main_lebs - 2; | 661 | tmp64 = c->main_lebs - 2 - c->jhead_cnt + 1; |
656 | tmp64 *= (uint64_t)c->leb_size - c->dark_wm; | 662 | tmp64 *= (uint64_t)c->leb_size - c->leb_overhead; |
657 | tmp64 = ubifs_reported_space(c, tmp64); | 663 | tmp64 = ubifs_reported_space(c, tmp64); |
658 | c->block_cnt = tmp64 >> UBIFS_BLOCK_SHIFT; | 664 | c->block_cnt = tmp64 >> UBIFS_BLOCK_SHIFT; |
659 | 665 | ||
diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c index e909f4a96443..7da209ab9378 100644 --- a/fs/ubifs/tnc.c +++ b/fs/ubifs/tnc.c | |||
@@ -506,7 +506,7 @@ static int fallible_read_node(struct ubifs_info *c, const union ubifs_key *key, | |||
506 | if (keys_cmp(c, key, &node_key) != 0) | 506 | if (keys_cmp(c, key, &node_key) != 0) |
507 | ret = 0; | 507 | ret = 0; |
508 | } | 508 | } |
509 | if (ret == 0) | 509 | if (ret == 0 && c->replaying) |
510 | dbg_mnt("dangling branch LEB %d:%d len %d, key %s", | 510 | dbg_mnt("dangling branch LEB %d:%d len %d, key %s", |
511 | zbr->lnum, zbr->offs, zbr->len, DBGKEY(key)); | 511 | zbr->lnum, zbr->offs, zbr->len, DBGKEY(key)); |
512 | return ret; | 512 | return ret; |
@@ -1382,50 +1382,39 @@ static int lookup_level0_dirty(struct ubifs_info *c, const union ubifs_key *key, | |||
1382 | } | 1382 | } |
1383 | 1383 | ||
1384 | /** | 1384 | /** |
1385 | * ubifs_tnc_lookup - look up a file-system node. | 1385 | * maybe_leb_gced - determine if a LEB may have been garbage collected. |
1386 | * @c: UBIFS file-system description object | 1386 | * @c: UBIFS file-system description object |
1387 | * @key: node key to lookup | 1387 | * @lnum: LEB number |
1388 | * @node: the node is returned here | 1388 | * @gc_seq1: garbage collection sequence number |
1389 | * | 1389 | * |
1390 | * This function look up and reads node with key @key. The caller has to make | 1390 | * This function determines if @lnum may have been garbage collected since |
1391 | * sure the @node buffer is large enough to fit the node. Returns zero in case | 1391 | * sequence number @gc_seq1. If it may have been then %1 is returned, otherwise |
1392 | * of success, %-ENOENT if the node was not found, and a negative error code in | 1392 | * %0 is returned. |
1393 | * case of failure. | ||
1394 | */ | 1393 | */ |
1395 | int ubifs_tnc_lookup(struct ubifs_info *c, const union ubifs_key *key, | 1394 | static int maybe_leb_gced(struct ubifs_info *c, int lnum, int gc_seq1) |
1396 | void *node) | ||
1397 | { | 1395 | { |
1398 | int found, n, err; | 1396 | int gc_seq2, gced_lnum; |
1399 | struct ubifs_znode *znode; | ||
1400 | struct ubifs_zbranch zbr, *zt; | ||
1401 | 1397 | ||
1402 | mutex_lock(&c->tnc_mutex); | 1398 | gced_lnum = c->gced_lnum; |
1403 | found = ubifs_lookup_level0(c, key, &znode, &n); | 1399 | smp_rmb(); |
1404 | if (!found) { | 1400 | gc_seq2 = c->gc_seq; |
1405 | err = -ENOENT; | 1401 | /* Same seq means no GC */ |
1406 | goto out; | 1402 | if (gc_seq1 == gc_seq2) |
1407 | } else if (found < 0) { | 1403 | return 0; |
1408 | err = found; | 1404 | /* Different by more than 1 means we don't know */ |
1409 | goto out; | 1405 | if (gc_seq1 + 1 != gc_seq2) |
1410 | } | 1406 | return 1; |
1411 | zt = &znode->zbranch[n]; | 1407 | /* |
1412 | if (is_hash_key(c, key)) { | 1408 | * We have seen the sequence number has increased by 1. Now we need to |
1413 | /* | 1409 | * be sure we read the right LEB number, so read it again. |
1414 | * In this case the leaf node cache gets used, so we pass the | 1410 | */ |
1415 | * address of the zbranch and keep the mutex locked | 1411 | smp_rmb(); |
1416 | */ | 1412 | if (gced_lnum != c->gced_lnum) |
1417 | err = tnc_read_node_nm(c, zt, node); | 1413 | return 1; |
1418 | goto out; | 1414 | /* Finally we can check lnum */ |
1419 | } | 1415 | if (gced_lnum == lnum) |
1420 | zbr = znode->zbranch[n]; | 1416 | return 1; |
1421 | mutex_unlock(&c->tnc_mutex); | 1417 | return 0; |
1422 | |||
1423 | err = ubifs_tnc_read_node(c, &zbr, node); | ||
1424 | return err; | ||
1425 | |||
1426 | out: | ||
1427 | mutex_unlock(&c->tnc_mutex); | ||
1428 | return err; | ||
1429 | } | 1418 | } |
1430 | 1419 | ||
1431 | /** | 1420 | /** |
@@ -1436,16 +1425,19 @@ out: | |||
1436 | * @lnum: LEB number is returned here | 1425 | * @lnum: LEB number is returned here |
1437 | * @offs: offset is returned here | 1426 | * @offs: offset is returned here |
1438 | * | 1427 | * |
1439 | * This function is the same as 'ubifs_tnc_lookup()' but it returns the node | 1428 | * This function look up and reads node with key @key. The caller has to make |
1440 | * location also. See 'ubifs_tnc_lookup()'. | 1429 | * sure the @node buffer is large enough to fit the node. Returns zero in case |
1430 | * of success, %-ENOENT if the node was not found, and a negative error code in | ||
1431 | * case of failure. The node location can be returned in @lnum and @offs. | ||
1441 | */ | 1432 | */ |
1442 | int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key, | 1433 | int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key, |
1443 | void *node, int *lnum, int *offs) | 1434 | void *node, int *lnum, int *offs) |
1444 | { | 1435 | { |
1445 | int found, n, err; | 1436 | int found, n, err, safely = 0, gc_seq1; |
1446 | struct ubifs_znode *znode; | 1437 | struct ubifs_znode *znode; |
1447 | struct ubifs_zbranch zbr, *zt; | 1438 | struct ubifs_zbranch zbr, *zt; |
1448 | 1439 | ||
1440 | again: | ||
1449 | mutex_lock(&c->tnc_mutex); | 1441 | mutex_lock(&c->tnc_mutex); |
1450 | found = ubifs_lookup_level0(c, key, &znode, &n); | 1442 | found = ubifs_lookup_level0(c, key, &znode, &n); |
1451 | if (!found) { | 1443 | if (!found) { |
@@ -1456,24 +1448,43 @@ int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key, | |||
1456 | goto out; | 1448 | goto out; |
1457 | } | 1449 | } |
1458 | zt = &znode->zbranch[n]; | 1450 | zt = &znode->zbranch[n]; |
1451 | if (lnum) { | ||
1452 | *lnum = zt->lnum; | ||
1453 | *offs = zt->offs; | ||
1454 | } | ||
1459 | if (is_hash_key(c, key)) { | 1455 | if (is_hash_key(c, key)) { |
1460 | /* | 1456 | /* |
1461 | * In this case the leaf node cache gets used, so we pass the | 1457 | * In this case the leaf node cache gets used, so we pass the |
1462 | * address of the zbranch and keep the mutex locked | 1458 | * address of the zbranch and keep the mutex locked |
1463 | */ | 1459 | */ |
1464 | *lnum = zt->lnum; | ||
1465 | *offs = zt->offs; | ||
1466 | err = tnc_read_node_nm(c, zt, node); | 1460 | err = tnc_read_node_nm(c, zt, node); |
1467 | goto out; | 1461 | goto out; |
1468 | } | 1462 | } |
1463 | if (safely) { | ||
1464 | err = ubifs_tnc_read_node(c, zt, node); | ||
1465 | goto out; | ||
1466 | } | ||
1467 | /* Drop the TNC mutex prematurely and race with garbage collection */ | ||
1469 | zbr = znode->zbranch[n]; | 1468 | zbr = znode->zbranch[n]; |
1469 | gc_seq1 = c->gc_seq; | ||
1470 | mutex_unlock(&c->tnc_mutex); | 1470 | mutex_unlock(&c->tnc_mutex); |
1471 | 1471 | ||
1472 | *lnum = zbr.lnum; | 1472 | if (ubifs_get_wbuf(c, zbr.lnum)) { |
1473 | *offs = zbr.offs; | 1473 | /* We do not GC journal heads */ |
1474 | err = ubifs_tnc_read_node(c, &zbr, node); | ||
1475 | return err; | ||
1476 | } | ||
1474 | 1477 | ||
1475 | err = ubifs_tnc_read_node(c, &zbr, node); | 1478 | err = fallible_read_node(c, key, &zbr, node); |
1476 | return err; | 1479 | if (maybe_leb_gced(c, zbr.lnum, gc_seq1)) { |
1480 | /* | ||
1481 | * The node may have been GC'ed out from under us so try again | ||
1482 | * while keeping the TNC mutex locked. | ||
1483 | */ | ||
1484 | safely = 1; | ||
1485 | goto again; | ||
1486 | } | ||
1487 | return 0; | ||
1477 | 1488 | ||
1478 | out: | 1489 | out: |
1479 | mutex_unlock(&c->tnc_mutex); | 1490 | mutex_unlock(&c->tnc_mutex); |
@@ -1498,7 +1509,6 @@ static int do_lookup_nm(struct ubifs_info *c, const union ubifs_key *key, | |||
1498 | { | 1509 | { |
1499 | int found, n, err; | 1510 | int found, n, err; |
1500 | struct ubifs_znode *znode; | 1511 | struct ubifs_znode *znode; |
1501 | struct ubifs_zbranch zbr; | ||
1502 | 1512 | ||
1503 | dbg_tnc("name '%.*s' key %s", nm->len, nm->name, DBGKEY(key)); | 1513 | dbg_tnc("name '%.*s' key %s", nm->len, nm->name, DBGKEY(key)); |
1504 | mutex_lock(&c->tnc_mutex); | 1514 | mutex_lock(&c->tnc_mutex); |
@@ -1522,11 +1532,7 @@ static int do_lookup_nm(struct ubifs_info *c, const union ubifs_key *key, | |||
1522 | goto out_unlock; | 1532 | goto out_unlock; |
1523 | } | 1533 | } |
1524 | 1534 | ||
1525 | zbr = znode->zbranch[n]; | 1535 | err = tnc_read_node_nm(c, &znode->zbranch[n], node); |
1526 | mutex_unlock(&c->tnc_mutex); | ||
1527 | |||
1528 | err = tnc_read_node_nm(c, &zbr, node); | ||
1529 | return err; | ||
1530 | 1536 | ||
1531 | out_unlock: | 1537 | out_unlock: |
1532 | mutex_unlock(&c->tnc_mutex); | 1538 | mutex_unlock(&c->tnc_mutex); |
diff --git a/fs/ubifs/ubifs-media.h b/fs/ubifs/ubifs-media.h index bd2121f3426e..a9ecbd9af20d 100644 --- a/fs/ubifs/ubifs-media.h +++ b/fs/ubifs/ubifs-media.h | |||
@@ -87,7 +87,7 @@ | |||
87 | #define UBIFS_SK_LEN 8 | 87 | #define UBIFS_SK_LEN 8 |
88 | 88 | ||
89 | /* Minimum index tree fanout */ | 89 | /* Minimum index tree fanout */ |
90 | #define UBIFS_MIN_FANOUT 2 | 90 | #define UBIFS_MIN_FANOUT 3 |
91 | 91 | ||
92 | /* Maximum number of levels in UBIFS indexing B-tree */ | 92 | /* Maximum number of levels in UBIFS indexing B-tree */ |
93 | #define UBIFS_MAX_LEVELS 512 | 93 | #define UBIFS_MAX_LEVELS 512 |
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index d7f706f7a302..17c620b93eec 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h | |||
@@ -995,6 +995,9 @@ struct ubifs_mount_opts { | |||
995 | * @max_idx_node_sz: maximum indexing node aligned on 8-bytes boundary | 995 | * @max_idx_node_sz: maximum indexing node aligned on 8-bytes boundary |
996 | * @max_inode_sz: maximum possible inode size in bytes | 996 | * @max_inode_sz: maximum possible inode size in bytes |
997 | * @max_znode_sz: size of znode in bytes | 997 | * @max_znode_sz: size of znode in bytes |
998 | * | ||
999 | * @leb_overhead: how many bytes are wasted in an LEB when it is filled with | ||
1000 | * data nodes of maximum size - used in free space reporting | ||
998 | * @dead_wm: LEB dead space watermark | 1001 | * @dead_wm: LEB dead space watermark |
999 | * @dark_wm: LEB dark space watermark | 1002 | * @dark_wm: LEB dark space watermark |
1000 | * @block_cnt: count of 4KiB blocks on the FS | 1003 | * @block_cnt: count of 4KiB blocks on the FS |
@@ -1028,6 +1031,8 @@ struct ubifs_mount_opts { | |||
1028 | * @sbuf: a buffer of LEB size used by GC and replay for scanning | 1031 | * @sbuf: a buffer of LEB size used by GC and replay for scanning |
1029 | * @idx_gc: list of index LEBs that have been garbage collected | 1032 | * @idx_gc: list of index LEBs that have been garbage collected |
1030 | * @idx_gc_cnt: number of elements on the idx_gc list | 1033 | * @idx_gc_cnt: number of elements on the idx_gc list |
1034 | * @gc_seq: incremented for every non-index LEB garbage collected | ||
1035 | * @gced_lnum: last non-index LEB that was garbage collected | ||
1031 | * | 1036 | * |
1032 | * @infos_list: links all 'ubifs_info' objects | 1037 | * @infos_list: links all 'ubifs_info' objects |
1033 | * @umount_mutex: serializes shrinker and un-mount | 1038 | * @umount_mutex: serializes shrinker and un-mount |
@@ -1224,6 +1229,8 @@ struct ubifs_info { | |||
1224 | int max_idx_node_sz; | 1229 | int max_idx_node_sz; |
1225 | long long max_inode_sz; | 1230 | long long max_inode_sz; |
1226 | int max_znode_sz; | 1231 | int max_znode_sz; |
1232 | |||
1233 | int leb_overhead; | ||
1227 | int dead_wm; | 1234 | int dead_wm; |
1228 | int dark_wm; | 1235 | int dark_wm; |
1229 | int block_cnt; | 1236 | int block_cnt; |
@@ -1257,6 +1264,8 @@ struct ubifs_info { | |||
1257 | void *sbuf; | 1264 | void *sbuf; |
1258 | struct list_head idx_gc; | 1265 | struct list_head idx_gc; |
1259 | int idx_gc_cnt; | 1266 | int idx_gc_cnt; |
1267 | volatile int gc_seq; | ||
1268 | volatile int gced_lnum; | ||
1260 | 1269 | ||
1261 | struct list_head infos_list; | 1270 | struct list_head infos_list; |
1262 | struct mutex umount_mutex; | 1271 | struct mutex umount_mutex; |
@@ -1434,9 +1443,10 @@ void ubifs_release_ino_dirty(struct ubifs_info *c, struct inode *inode, | |||
1434 | struct ubifs_budget_req *req); | 1443 | struct ubifs_budget_req *req); |
1435 | void ubifs_cancel_ino_op(struct ubifs_info *c, struct inode *inode, | 1444 | void ubifs_cancel_ino_op(struct ubifs_info *c, struct inode *inode, |
1436 | struct ubifs_budget_req *req); | 1445 | struct ubifs_budget_req *req); |
1437 | long long ubifs_budg_get_free_space(struct ubifs_info *c); | 1446 | long long ubifs_get_free_space(struct ubifs_info *c); |
1438 | int ubifs_calc_min_idx_lebs(struct ubifs_info *c); | 1447 | int ubifs_calc_min_idx_lebs(struct ubifs_info *c); |
1439 | void ubifs_convert_page_budget(struct ubifs_info *c); | 1448 | void ubifs_convert_page_budget(struct ubifs_info *c); |
1449 | long long ubifs_reported_space(const struct ubifs_info *c, uint64_t free); | ||
1440 | long long ubifs_calc_available(const struct ubifs_info *c, int min_idx_lebs); | 1450 | long long ubifs_calc_available(const struct ubifs_info *c, int min_idx_lebs); |
1441 | 1451 | ||
1442 | /* find.c */ | 1452 | /* find.c */ |
@@ -1451,8 +1461,6 @@ int ubifs_save_dirty_idx_lnums(struct ubifs_info *c); | |||
1451 | /* tnc.c */ | 1461 | /* tnc.c */ |
1452 | int ubifs_lookup_level0(struct ubifs_info *c, const union ubifs_key *key, | 1462 | int ubifs_lookup_level0(struct ubifs_info *c, const union ubifs_key *key, |
1453 | struct ubifs_znode **zn, int *n); | 1463 | struct ubifs_znode **zn, int *n); |
1454 | int ubifs_tnc_lookup(struct ubifs_info *c, const union ubifs_key *key, | ||
1455 | void *node); | ||
1456 | int ubifs_tnc_lookup_nm(struct ubifs_info *c, const union ubifs_key *key, | 1464 | int ubifs_tnc_lookup_nm(struct ubifs_info *c, const union ubifs_key *key, |
1457 | void *node, const struct qstr *nm); | 1465 | void *node, const struct qstr *nm); |
1458 | int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key, | 1466 | int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key, |