diff options
Diffstat (limited to 'fs/ubifs/budget.c')
-rw-r--r-- | fs/ubifs/budget.c | 212 |
1 files changed, 71 insertions, 141 deletions
diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c index 4a18f084cc42..175f9c590b77 100644 --- a/fs/ubifs/budget.c +++ b/fs/ubifs/budget.c | |||
@@ -32,18 +32,15 @@ | |||
32 | 32 | ||
33 | #include "ubifs.h" | 33 | #include "ubifs.h" |
34 | #include <linux/writeback.h> | 34 | #include <linux/writeback.h> |
35 | #include <asm/div64.h> | 35 | #include <linux/math64.h> |
36 | 36 | ||
37 | /* | 37 | /* |
38 | * When pessimistic budget calculations say that there is no enough space, | 38 | * When pessimistic budget calculations say that there is no enough space, |
39 | * UBIFS starts writing back dirty inodes and pages, doing garbage collection, | 39 | * UBIFS starts writing back dirty inodes and pages, doing garbage collection, |
40 | * or committing. The below constants define maximum number of times UBIFS | 40 | * or committing. The below constant defines maximum number of times UBIFS |
41 | * repeats the operations. | 41 | * repeats the operations. |
42 | */ | 42 | */ |
43 | #define MAX_SHRINK_RETRIES 8 | 43 | #define MAX_MKSPC_RETRIES 3 |
44 | #define MAX_GC_RETRIES 4 | ||
45 | #define MAX_CMT_RETRIES 2 | ||
46 | #define MAX_NOSPC_RETRIES 1 | ||
47 | 44 | ||
48 | /* | 45 | /* |
49 | * The below constant defines amount of dirty pages which should be written | 46 | * The below constant defines amount of dirty pages which should be written |
@@ -52,30 +49,6 @@ | |||
52 | #define NR_TO_WRITE 16 | 49 | #define NR_TO_WRITE 16 |
53 | 50 | ||
54 | /** | 51 | /** |
55 | * struct retries_info - information about re-tries while making free space. | ||
56 | * @prev_liability: previous liability | ||
57 | * @shrink_cnt: how many times the liability was shrinked | ||
58 | * @shrink_retries: count of liability shrink re-tries (increased when | ||
59 | * liability does not shrink) | ||
60 | * @try_gc: GC should be tried first | ||
61 | * @gc_retries: how many times GC was run | ||
62 | * @cmt_retries: how many times commit has been done | ||
63 | * @nospc_retries: how many times GC returned %-ENOSPC | ||
64 | * | ||
65 | * Since we consider budgeting to be the fast-path, and this structure has to | ||
66 | * be allocated on stack and zeroed out, we make it smaller using bit-fields. | ||
67 | */ | ||
68 | struct retries_info { | ||
69 | long long prev_liability; | ||
70 | unsigned int shrink_cnt; | ||
71 | unsigned int shrink_retries:5; | ||
72 | unsigned int try_gc:1; | ||
73 | unsigned int gc_retries:4; | ||
74 | unsigned int cmt_retries:3; | ||
75 | unsigned int nospc_retries:1; | ||
76 | }; | ||
77 | |||
78 | /** | ||
79 | * shrink_liability - write-back some dirty pages/inodes. | 52 | * shrink_liability - write-back some dirty pages/inodes. |
80 | * @c: UBIFS file-system description object | 53 | * @c: UBIFS file-system description object |
81 | * @nr_to_write: how many dirty pages to write-back | 54 | * @nr_to_write: how many dirty pages to write-back |
@@ -147,13 +120,29 @@ static int run_gc(struct ubifs_info *c) | |||
147 | } | 120 | } |
148 | 121 | ||
149 | /** | 122 | /** |
123 | * get_liability - calculate current liability. | ||
124 | * @c: UBIFS file-system description object | ||
125 | * | ||
126 | * This function calculates and returns current UBIFS liability, i.e. the | ||
127 | * amount of bytes UBIFS has "promised" to write to the media. | ||
128 | */ | ||
129 | static long long get_liability(struct ubifs_info *c) | ||
130 | { | ||
131 | long long liab; | ||
132 | |||
133 | spin_lock(&c->space_lock); | ||
134 | liab = c->budg_idx_growth + c->budg_data_growth + c->budg_dd_growth; | ||
135 | spin_unlock(&c->space_lock); | ||
136 | return liab; | ||
137 | } | ||
138 | |||
139 | /** | ||
150 | * make_free_space - make more free space on the file-system. | 140 | * make_free_space - make more free space on the file-system. |
151 | * @c: UBIFS file-system description object | 141 | * @c: UBIFS file-system description object |
152 | * @ri: information about previous invocations of this function | ||
153 | * | 142 | * |
154 | * This function is called when an operation cannot be budgeted because there | 143 | * This function is called when an operation cannot be budgeted because there |
155 | * is supposedly no free space. But in most cases there is some free space: | 144 | * is supposedly no free space. But in most cases there is some free space: |
156 | * o budgeting is pessimistic, so it always budgets more then it is actually | 145 | * o budgeting is pessimistic, so it always budgets more than it is actually |
157 | * needed, so shrinking the liability is one way to make free space - the | 146 | * needed, so shrinking the liability is one way to make free space - the |
158 | * cached data will take less space then it was budgeted for; | 147 | * cached data will take less space then it was budgeted for; |
159 | * o GC may turn some dark space into free space (budgeting treats dark space | 148 | * o GC may turn some dark space into free space (budgeting treats dark space |
@@ -165,87 +154,42 @@ static int run_gc(struct ubifs_info *c) | |||
165 | * Returns %-ENOSPC if it couldn't do more free space, and other negative error | 154 | * Returns %-ENOSPC if it couldn't do more free space, and other negative error |
166 | * codes on failures. | 155 | * codes on failures. |
167 | */ | 156 | */ |
168 | static int make_free_space(struct ubifs_info *c, struct retries_info *ri) | 157 | static int make_free_space(struct ubifs_info *c) |
169 | { | 158 | { |
170 | int err; | 159 | int err, retries = 0; |
171 | 160 | long long liab1, liab2; | |
172 | /* | ||
173 | * If we have some dirty pages and inodes (liability), try to write | ||
174 | * them back unless this was tried too many times without effect | ||
175 | * already. | ||
176 | */ | ||
177 | if (ri->shrink_retries < MAX_SHRINK_RETRIES && !ri->try_gc) { | ||
178 | long long liability; | ||
179 | |||
180 | spin_lock(&c->space_lock); | ||
181 | liability = c->budg_idx_growth + c->budg_data_growth + | ||
182 | c->budg_dd_growth; | ||
183 | spin_unlock(&c->space_lock); | ||
184 | 161 | ||
185 | if (ri->prev_liability >= liability) { | 162 | do { |
186 | /* Liability does not shrink, next time try GC then */ | 163 | liab1 = get_liability(c); |
187 | ri->shrink_retries += 1; | 164 | /* |
188 | if (ri->gc_retries < MAX_GC_RETRIES) | 165 | * We probably have some dirty pages or inodes (liability), try |
189 | ri->try_gc = 1; | 166 | * to write them back. |
190 | dbg_budg("liability did not shrink: retries %d of %d", | 167 | */ |
191 | ri->shrink_retries, MAX_SHRINK_RETRIES); | 168 | dbg_budg("liability %lld, run write-back", liab1); |
192 | } | 169 | shrink_liability(c, NR_TO_WRITE); |
193 | 170 | ||
194 | dbg_budg("force write-back (count %d)", ri->shrink_cnt); | 171 | liab2 = get_liability(c); |
195 | shrink_liability(c, NR_TO_WRITE + ri->shrink_cnt); | 172 | if (liab2 < liab1) |
173 | return -EAGAIN; | ||
196 | 174 | ||
197 | ri->prev_liability = liability; | 175 | dbg_budg("new liability %lld (not shrinked)", liab2); |
198 | ri->shrink_cnt += 1; | ||
199 | return -EAGAIN; | ||
200 | } | ||
201 | 176 | ||
202 | /* | 177 | /* Liability did not shrink again, try GC */ |
203 | * Try to run garbage collector unless it was already tried too many | 178 | dbg_budg("Run GC"); |
204 | * times. | ||
205 | */ | ||
206 | if (ri->gc_retries < MAX_GC_RETRIES) { | ||
207 | ri->gc_retries += 1; | ||
208 | dbg_budg("run GC, retries %d of %d", | ||
209 | ri->gc_retries, MAX_GC_RETRIES); | ||
210 | |||
211 | ri->try_gc = 0; | ||
212 | err = run_gc(c); | 179 | err = run_gc(c); |
213 | if (!err) | 180 | if (!err) |
214 | return -EAGAIN; | 181 | return -EAGAIN; |
215 | 182 | ||
216 | if (err == -EAGAIN) { | 183 | if (err != -EAGAIN && err != -ENOSPC) |
217 | dbg_budg("GC asked to commit"); | 184 | /* Some real error happened */ |
218 | err = ubifs_run_commit(c); | ||
219 | if (err) | ||
220 | return err; | ||
221 | return -EAGAIN; | ||
222 | } | ||
223 | |||
224 | if (err != -ENOSPC) | ||
225 | return err; | ||
226 | |||
227 | /* | ||
228 | * GC could not make any progress. If this is the first time, | ||
229 | * then it makes sense to try to commit, because it might make | ||
230 | * some dirty space. | ||
231 | */ | ||
232 | dbg_budg("GC returned -ENOSPC, retries %d", | ||
233 | ri->nospc_retries); | ||
234 | if (ri->nospc_retries >= MAX_NOSPC_RETRIES) | ||
235 | return err; | 185 | return err; |
236 | ri->nospc_retries += 1; | ||
237 | } | ||
238 | 186 | ||
239 | /* Neither GC nor write-back helped, try to commit */ | 187 | dbg_budg("Run commit (retries %d)", retries); |
240 | if (ri->cmt_retries < MAX_CMT_RETRIES) { | ||
241 | ri->cmt_retries += 1; | ||
242 | dbg_budg("run commit, retries %d of %d", | ||
243 | ri->cmt_retries, MAX_CMT_RETRIES); | ||
244 | err = ubifs_run_commit(c); | 188 | err = ubifs_run_commit(c); |
245 | if (err) | 189 | if (err) |
246 | return err; | 190 | return err; |
247 | return -EAGAIN; | 191 | } while (retries++ < MAX_MKSPC_RETRIES); |
248 | } | 192 | |
249 | return -ENOSPC; | 193 | return -ENOSPC; |
250 | } | 194 | } |
251 | 195 | ||
@@ -258,8 +202,8 @@ static int make_free_space(struct ubifs_info *c, struct retries_info *ri) | |||
258 | */ | 202 | */ |
259 | int ubifs_calc_min_idx_lebs(struct ubifs_info *c) | 203 | int ubifs_calc_min_idx_lebs(struct ubifs_info *c) |
260 | { | 204 | { |
261 | int ret; | 205 | int idx_lebs, eff_leb_size = c->leb_size - c->max_idx_node_sz; |
262 | uint64_t idx_size; | 206 | long long idx_size; |
263 | 207 | ||
264 | idx_size = c->old_idx_sz + c->budg_idx_growth + c->budg_uncommitted_idx; | 208 | idx_size = c->old_idx_sz + c->budg_idx_growth + c->budg_uncommitted_idx; |
265 | 209 | ||
@@ -271,23 +215,16 @@ int ubifs_calc_min_idx_lebs(struct ubifs_info *c) | |||
271 | * pair, nor similarly the two variables for the new index size, so we | 215 | * pair, nor similarly the two variables for the new index size, so we |
272 | * have to do this costly 64-bit division on fast-path. | 216 | * have to do this costly 64-bit division on fast-path. |
273 | */ | 217 | */ |
274 | if (do_div(idx_size, c->leb_size - c->max_idx_node_sz)) | 218 | idx_size += eff_leb_size - 1; |
275 | ret = idx_size + 1; | 219 | idx_lebs = div_u64(idx_size, eff_leb_size); |
276 | else | ||
277 | ret = idx_size; | ||
278 | /* | 220 | /* |
279 | * The index head is not available for the in-the-gaps method, so add an | 221 | * The index head is not available for the in-the-gaps method, so add an |
280 | * extra LEB to compensate. | 222 | * extra LEB to compensate. |
281 | */ | 223 | */ |
282 | ret += 1; | 224 | idx_lebs += 1; |
283 | /* | 225 | if (idx_lebs < MIN_INDEX_LEBS) |
284 | * At present the index needs at least 2 LEBs: one for the index head | 226 | idx_lebs = MIN_INDEX_LEBS; |
285 | * and one for in-the-gaps method (which currently does not cater for | 227 | return idx_lebs; |
286 | * the index head and so excludes it from consideration). | ||
287 | */ | ||
288 | if (ret < 2) | ||
289 | ret = 2; | ||
290 | return ret; | ||
291 | } | 228 | } |
292 | 229 | ||
293 | /** | 230 | /** |
@@ -530,8 +467,7 @@ static int calc_dd_growth(const struct ubifs_info *c, | |||
530 | int ubifs_budget_space(struct ubifs_info *c, struct ubifs_budget_req *req) | 467 | int ubifs_budget_space(struct ubifs_info *c, struct ubifs_budget_req *req) |
531 | { | 468 | { |
532 | int uninitialized_var(cmt_retries), uninitialized_var(wb_retries); | 469 | int uninitialized_var(cmt_retries), uninitialized_var(wb_retries); |
533 | int err, idx_growth, data_growth, dd_growth; | 470 | int err, idx_growth, data_growth, dd_growth, retried = 0; |
534 | struct retries_info ri; | ||
535 | 471 | ||
536 | ubifs_assert(req->new_page <= 1); | 472 | ubifs_assert(req->new_page <= 1); |
537 | ubifs_assert(req->dirtied_page <= 1); | 473 | ubifs_assert(req->dirtied_page <= 1); |
@@ -549,7 +485,6 @@ int ubifs_budget_space(struct ubifs_info *c, struct ubifs_budget_req *req) | |||
549 | if (!data_growth && !dd_growth) | 485 | if (!data_growth && !dd_growth) |
550 | return 0; | 486 | return 0; |
551 | idx_growth = calc_idx_growth(c, req); | 487 | idx_growth = calc_idx_growth(c, req); |
552 | memset(&ri, 0, sizeof(struct retries_info)); | ||
553 | 488 | ||
554 | again: | 489 | again: |
555 | spin_lock(&c->space_lock); | 490 | spin_lock(&c->space_lock); |
@@ -587,12 +522,17 @@ again: | |||
587 | return err; | 522 | return err; |
588 | } | 523 | } |
589 | 524 | ||
590 | err = make_free_space(c, &ri); | 525 | err = make_free_space(c); |
526 | cond_resched(); | ||
591 | if (err == -EAGAIN) { | 527 | if (err == -EAGAIN) { |
592 | dbg_budg("try again"); | 528 | dbg_budg("try again"); |
593 | cond_resched(); | ||
594 | goto again; | 529 | goto again; |
595 | } else if (err == -ENOSPC) { | 530 | } else if (err == -ENOSPC) { |
531 | if (!retried) { | ||
532 | retried = 1; | ||
533 | dbg_budg("-ENOSPC, but anyway try once again"); | ||
534 | goto again; | ||
535 | } | ||
596 | dbg_budg("FS is full, -ENOSPC"); | 536 | dbg_budg("FS is full, -ENOSPC"); |
597 | c->nospace = 1; | 537 | c->nospace = 1; |
598 | if (can_use_rp(c) || c->rp_size == 0) | 538 | if (can_use_rp(c) || c->rp_size == 0) |
@@ -666,7 +606,7 @@ void ubifs_release_budget(struct ubifs_info *c, struct ubifs_budget_req *req) | |||
666 | * @c: UBIFS file-system description object | 606 | * @c: UBIFS file-system description object |
667 | * | 607 | * |
668 | * This function converts budget which was allocated for a new page of data to | 608 | * This function converts budget which was allocated for a new page of data to |
669 | * the budget of changing an existing page of data. The latter is smaller then | 609 | * the budget of changing an existing page of data. The latter is smaller than |
670 | * the former, so this function only does simple re-calculation and does not | 610 | * the former, so this function only does simple re-calculation and does not |
671 | * involve any write-back. | 611 | * involve any write-back. |
672 | */ | 612 | */ |
@@ -712,9 +652,9 @@ void ubifs_release_dirty_inode_budget(struct ubifs_info *c, | |||
712 | * user-space. User-space application tend to expect that if the file-system | 652 | * user-space. User-space application tend to expect that if the file-system |
713 | * (e.g., via the 'statfs()' call) reports that it has N bytes available, they | 653 | * (e.g., via the 'statfs()' call) reports that it has N bytes available, they |
714 | * are able to write a file of size N. UBIFS attaches node headers to each data | 654 | * are able to write a file of size N. UBIFS attaches node headers to each data |
715 | * node and it has to write indexind nodes as well. This introduces additional | 655 | * node and it has to write indexing nodes as well. This introduces additional |
716 | * overhead, and UBIFS it has to report sligtly less free space to meet the | 656 | * overhead, and UBIFS has to report slightly less free space to meet the above |
717 | * above expectetion. | 657 | * expectations. |
718 | * | 658 | * |
719 | * This function assumes free space is made up of uncompressed data nodes and | 659 | * This function assumes free space is made up of uncompressed data nodes and |
720 | * full index nodes (one per data node, tripled because we always allow enough | 660 | * full index nodes (one per data node, tripled because we always allow enough |
@@ -723,7 +663,7 @@ void ubifs_release_dirty_inode_budget(struct ubifs_info *c, | |||
723 | * Note, the calculation is pessimistic, which means that most of the time | 663 | * Note, the calculation is pessimistic, which means that most of the time |
724 | * UBIFS reports less space than it actually has. | 664 | * UBIFS reports less space than it actually has. |
725 | */ | 665 | */ |
726 | long long ubifs_reported_space(const struct ubifs_info *c, uint64_t free) | 666 | long long ubifs_reported_space(const struct ubifs_info *c, long long free) |
727 | { | 667 | { |
728 | int divisor, factor, f; | 668 | int divisor, factor, f; |
729 | 669 | ||
@@ -737,7 +677,7 @@ long long ubifs_reported_space(const struct ubifs_info *c, uint64_t free) | |||
737 | * of data nodes, f - fanout. Because effective UBIFS fanout is twice | 677 | * of data nodes, f - fanout. Because effective UBIFS fanout is twice |
738 | * as less than maximum fanout, we assume that each data node | 678 | * as less than maximum fanout, we assume that each data node |
739 | * introduces 3 * @c->max_idx_node_sz / (@c->fanout/2 - 1) bytes. | 679 | * introduces 3 * @c->max_idx_node_sz / (@c->fanout/2 - 1) bytes. |
740 | * Note, the multiplier 3 is because UBIFS reseves thrice as more space | 680 | * Note, the multiplier 3 is because UBIFS reserves thrice as more space |
741 | * for the index. | 681 | * for the index. |
742 | */ | 682 | */ |
743 | f = c->fanout > 3 ? c->fanout >> 1 : 2; | 683 | f = c->fanout > 3 ? c->fanout >> 1 : 2; |
@@ -745,8 +685,7 @@ long long ubifs_reported_space(const struct ubifs_info *c, uint64_t free) | |||
745 | divisor = UBIFS_MAX_DATA_NODE_SZ; | 685 | divisor = UBIFS_MAX_DATA_NODE_SZ; |
746 | divisor += (c->max_idx_node_sz * 3) / (f - 1); | 686 | divisor += (c->max_idx_node_sz * 3) / (f - 1); |
747 | free *= factor; | 687 | free *= factor; |
748 | do_div(free, divisor); | 688 | return div_u64(free, divisor); |
749 | return free; | ||
750 | } | 689 | } |
751 | 690 | ||
752 | /** | 691 | /** |
@@ -756,10 +695,10 @@ long long ubifs_reported_space(const struct ubifs_info *c, uint64_t free) | |||
756 | * This function calculates amount of free space to report to user-space. | 695 | * This function calculates amount of free space to report to user-space. |
757 | * | 696 | * |
758 | * Because UBIFS may introduce substantial overhead (the index, node headers, | 697 | * Because UBIFS may introduce substantial overhead (the index, node headers, |
759 | * alighment, wastage at the end of eraseblocks, etc), it cannot report real | 698 | * alignment, wastage at the end of eraseblocks, etc), it cannot report real |
760 | * amount of free flash space it has (well, because not all dirty space is | 699 | * amount of free flash space it has (well, because not all dirty space is |
761 | * reclamable, UBIFS does not actually know the real amount). If UBIFS did so, | 700 | * reclaimable, UBIFS does not actually know the real amount). If UBIFS did so, |
762 | * it would bread user expectetion about what free space is. Users seem to | 701 | * it would bread user expectations about what free space is. Users seem to |
763 | * accustomed to assume that if the file-system reports N bytes of free space, | 702 | * accustomed to assume that if the file-system reports N bytes of free space, |
764 | * they would be able to fit a file of N bytes to the FS. This almost works for | 703 | * they would be able to fit a file of N bytes to the FS. This almost works for |
765 | * traditional file-systems, because they have way less overhead than UBIFS. | 704 | * traditional file-systems, because they have way less overhead than UBIFS. |
@@ -771,18 +710,9 @@ long long ubifs_get_free_space(struct ubifs_info *c) | |||
771 | long long available, outstanding, free; | 710 | long long available, outstanding, free; |
772 | 711 | ||
773 | spin_lock(&c->space_lock); | 712 | spin_lock(&c->space_lock); |
774 | min_idx_lebs = ubifs_calc_min_idx_lebs(c); | 713 | min_idx_lebs = c->min_idx_lebs; |
714 | ubifs_assert(min_idx_lebs == ubifs_calc_min_idx_lebs(c)); | ||
775 | outstanding = c->budg_data_growth + c->budg_dd_growth; | 715 | outstanding = c->budg_data_growth + c->budg_dd_growth; |
776 | |||
777 | /* | ||
778 | * Force the amount available to the total size reported if the used | ||
779 | * space is zero. | ||
780 | */ | ||
781 | if (c->lst.total_used <= UBIFS_INO_NODE_SZ && !outstanding) { | ||
782 | spin_unlock(&c->space_lock); | ||
783 | return (long long)c->block_cnt << UBIFS_BLOCK_SHIFT; | ||
784 | } | ||
785 | |||
786 | available = ubifs_calc_available(c, min_idx_lebs); | 716 | available = ubifs_calc_available(c, min_idx_lebs); |
787 | 717 | ||
788 | /* | 718 | /* |