diff options
Diffstat (limited to 'fs/ext3/balloc.c')
-rw-r--r-- | fs/ext3/balloc.c | 350 |
1 files changed, 278 insertions, 72 deletions
diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c index 063d994bda0b..b41a7d7e20f0 100644 --- a/fs/ext3/balloc.c +++ b/fs/ext3/balloc.c | |||
@@ -38,6 +38,13 @@ | |||
38 | 38 | ||
39 | #define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) | 39 | #define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) |
40 | 40 | ||
41 | /** | ||
42 | * ext3_get_group_desc() -- load group descriptor from disk | ||
43 | * @sb: super block | ||
44 | * @block_group: given block group | ||
45 | * @bh: pointer to the buffer head to store the block | ||
46 | * group descriptor | ||
47 | */ | ||
41 | struct ext3_group_desc * ext3_get_group_desc(struct super_block * sb, | 48 | struct ext3_group_desc * ext3_get_group_desc(struct super_block * sb, |
42 | unsigned int block_group, | 49 | unsigned int block_group, |
43 | struct buffer_head ** bh) | 50 | struct buffer_head ** bh) |
@@ -73,8 +80,12 @@ struct ext3_group_desc * ext3_get_group_desc(struct super_block * sb, | |||
73 | return desc + offset; | 80 | return desc + offset; |
74 | } | 81 | } |
75 | 82 | ||
76 | /* | 83 | /** |
77 | * Read the bitmap for a given block_group, reading into the specified | 84 | * read_block_bitmap() |
85 | * @sb: super block | ||
86 | * @block_group: given block group | ||
87 | * | ||
88 | * Read the bitmap for a given block_group, reading into the specified | ||
78 | * slot in the superblock's bitmap cache. | 89 | * slot in the superblock's bitmap cache. |
79 | * | 90 | * |
80 | * Return buffer_head on success or NULL in case of failure. | 91 | * Return buffer_head on success or NULL in case of failure. |
@@ -103,15 +114,22 @@ error_out: | |||
103 | * Operations include: | 114 | * Operations include: |
104 | * dump, find, add, remove, is_empty, find_next_reservable_window, etc. | 115 | * dump, find, add, remove, is_empty, find_next_reservable_window, etc. |
105 | * | 116 | * |
106 | * We use sorted double linked list for the per-filesystem reservation | 117 | * We use a red-black tree to represent per-filesystem reservation |
107 | * window list. (like in vm_region). | 118 | * windows. |
119 | * | ||
120 | */ | ||
121 | |||
122 | /** | ||
123 | * __rsv_window_dump() -- Dump the filesystem block allocation reservation map | ||
124 | * @rb_root: root of per-filesystem reservation rb tree | ||
125 | * @verbose: verbose mode | ||
126 | * @fn: function which wishes to dump the reservation map | ||
108 | * | 127 | * |
109 | * Initially, we keep those small operations in the abstract functions, | 128 | * If verbose is turned on, it will print the whole block reservation |
110 | * so later if we need a better searching tree than double linked-list, | 129 | * windows(start, end). Otherwise, it will only print out the "bad" windows, |
111 | * we could easily switch to that without changing too much | 130 | * those windows that overlap with their immediate neighbors. |
112 | * code. | ||
113 | */ | 131 | */ |
114 | #if 0 | 132 | #if 1 |
115 | static void __rsv_window_dump(struct rb_root *root, int verbose, | 133 | static void __rsv_window_dump(struct rb_root *root, int verbose, |
116 | const char *fn) | 134 | const char *fn) |
117 | { | 135 | { |
@@ -129,7 +147,7 @@ restart: | |||
129 | rsv = list_entry(n, struct ext3_reserve_window_node, rsv_node); | 147 | rsv = list_entry(n, struct ext3_reserve_window_node, rsv_node); |
130 | if (verbose) | 148 | if (verbose) |
131 | printk("reservation window 0x%p " | 149 | printk("reservation window 0x%p " |
132 | "start: %d, end: %d\n", | 150 | "start: %lu, end: %lu\n", |
133 | rsv, rsv->rsv_start, rsv->rsv_end); | 151 | rsv, rsv->rsv_start, rsv->rsv_end); |
134 | if (rsv->rsv_start && rsv->rsv_start >= rsv->rsv_end) { | 152 | if (rsv->rsv_start && rsv->rsv_start >= rsv->rsv_end) { |
135 | printk("Bad reservation %p (start >= end)\n", | 153 | printk("Bad reservation %p (start >= end)\n", |
@@ -161,6 +179,22 @@ restart: | |||
161 | #define rsv_window_dump(root, verbose) do {} while (0) | 179 | #define rsv_window_dump(root, verbose) do {} while (0) |
162 | #endif | 180 | #endif |
163 | 181 | ||
182 | /** | ||
183 | * goal_in_my_reservation() | ||
184 | * @rsv: inode's reservation window | ||
185 | * @grp_goal: given goal block relative to the allocation block group | ||
186 | * @group: the current allocation block group | ||
187 | * @sb: filesystem super block | ||
188 | * | ||
189 | * Test if the given goal block (group relative) is within the file's | ||
190 | * own block reservation window range. | ||
191 | * | ||
192 | * If the reservation window is outside the goal allocation group, return 0; | ||
193 | * grp_goal (given goal block) could be -1, which means no specific | ||
194 | * goal block. In this case, always return 1. | ||
195 | * If the goal block is within the reservation window, return 1; | ||
196 | * otherwise, return 0; | ||
197 | */ | ||
164 | static int | 198 | static int |
165 | goal_in_my_reservation(struct ext3_reserve_window *rsv, ext3_grpblk_t grp_goal, | 199 | goal_in_my_reservation(struct ext3_reserve_window *rsv, ext3_grpblk_t grp_goal, |
166 | unsigned int group, struct super_block * sb) | 200 | unsigned int group, struct super_block * sb) |
@@ -168,7 +202,7 @@ goal_in_my_reservation(struct ext3_reserve_window *rsv, ext3_grpblk_t grp_goal, | |||
168 | ext3_fsblk_t group_first_block, group_last_block; | 202 | ext3_fsblk_t group_first_block, group_last_block; |
169 | 203 | ||
170 | group_first_block = ext3_group_first_block_no(sb, group); | 204 | group_first_block = ext3_group_first_block_no(sb, group); |
171 | group_last_block = group_first_block + EXT3_BLOCKS_PER_GROUP(sb) - 1; | 205 | group_last_block = group_first_block + (EXT3_BLOCKS_PER_GROUP(sb) - 1); |
172 | 206 | ||
173 | if ((rsv->_rsv_start > group_last_block) || | 207 | if ((rsv->_rsv_start > group_last_block) || |
174 | (rsv->_rsv_end < group_first_block)) | 208 | (rsv->_rsv_end < group_first_block)) |
@@ -179,7 +213,11 @@ goal_in_my_reservation(struct ext3_reserve_window *rsv, ext3_grpblk_t grp_goal, | |||
179 | return 1; | 213 | return 1; |
180 | } | 214 | } |
181 | 215 | ||
182 | /* | 216 | /** |
217 | * search_reserve_window() | ||
218 | * @rb_root: root of reservation tree | ||
219 | * @goal: target allocation block | ||
220 | * | ||
183 | * Find the reserved window which includes the goal, or the previous one | 221 | * Find the reserved window which includes the goal, or the previous one |
184 | * if the goal is not in any window. | 222 | * if the goal is not in any window. |
185 | * Returns NULL if there are no windows or if all windows start after the goal. | 223 | * Returns NULL if there are no windows or if all windows start after the goal. |
@@ -216,6 +254,13 @@ search_reserve_window(struct rb_root *root, ext3_fsblk_t goal) | |||
216 | return rsv; | 254 | return rsv; |
217 | } | 255 | } |
218 | 256 | ||
257 | /** | ||
258 | * ext3_rsv_window_add() -- Insert a window to the block reservation rb tree. | ||
259 | * @sb: super block | ||
260 | * @rsv: reservation window to add | ||
261 | * | ||
262 | * Must be called with rsv_lock hold. | ||
263 | */ | ||
219 | void ext3_rsv_window_add(struct super_block *sb, | 264 | void ext3_rsv_window_add(struct super_block *sb, |
220 | struct ext3_reserve_window_node *rsv) | 265 | struct ext3_reserve_window_node *rsv) |
221 | { | 266 | { |
@@ -236,14 +281,25 @@ void ext3_rsv_window_add(struct super_block *sb, | |||
236 | p = &(*p)->rb_left; | 281 | p = &(*p)->rb_left; |
237 | else if (start > this->rsv_end) | 282 | else if (start > this->rsv_end) |
238 | p = &(*p)->rb_right; | 283 | p = &(*p)->rb_right; |
239 | else | 284 | else { |
285 | rsv_window_dump(root, 1); | ||
240 | BUG(); | 286 | BUG(); |
287 | } | ||
241 | } | 288 | } |
242 | 289 | ||
243 | rb_link_node(node, parent, p); | 290 | rb_link_node(node, parent, p); |
244 | rb_insert_color(node, root); | 291 | rb_insert_color(node, root); |
245 | } | 292 | } |
246 | 293 | ||
294 | /** | ||
295 | * ext3_rsv_window_remove() -- unlink a window from the reservation rb tree | ||
296 | * @sb: super block | ||
297 | * @rsv: reservation window to remove | ||
298 | * | ||
299 | * Mark the block reservation window as not allocated, and unlink it | ||
300 | * from the filesystem reservation window rb tree. Must be called with | ||
301 | * rsv_lock hold. | ||
302 | */ | ||
247 | static void rsv_window_remove(struct super_block *sb, | 303 | static void rsv_window_remove(struct super_block *sb, |
248 | struct ext3_reserve_window_node *rsv) | 304 | struct ext3_reserve_window_node *rsv) |
249 | { | 305 | { |
@@ -253,11 +309,39 @@ static void rsv_window_remove(struct super_block *sb, | |||
253 | rb_erase(&rsv->rsv_node, &EXT3_SB(sb)->s_rsv_window_root); | 309 | rb_erase(&rsv->rsv_node, &EXT3_SB(sb)->s_rsv_window_root); |
254 | } | 310 | } |
255 | 311 | ||
312 | /* | ||
313 | * rsv_is_empty() -- Check if the reservation window is allocated. | ||
314 | * @rsv: given reservation window to check | ||
315 | * | ||
316 | * returns 1 if the end block is EXT3_RESERVE_WINDOW_NOT_ALLOCATED. | ||
317 | */ | ||
256 | static inline int rsv_is_empty(struct ext3_reserve_window *rsv) | 318 | static inline int rsv_is_empty(struct ext3_reserve_window *rsv) |
257 | { | 319 | { |
258 | /* a valid reservation end block could not be 0 */ | 320 | /* a valid reservation end block could not be 0 */ |
259 | return (rsv->_rsv_end == EXT3_RESERVE_WINDOW_NOT_ALLOCATED); | 321 | return rsv->_rsv_end == EXT3_RESERVE_WINDOW_NOT_ALLOCATED; |
260 | } | 322 | } |
323 | |||
324 | /** | ||
325 | * ext3_init_block_alloc_info() | ||
326 | * @inode: file inode structure | ||
327 | * | ||
328 | * Allocate and initialize the reservation window structure, and | ||
329 | * link the window to the ext3 inode structure at last | ||
330 | * | ||
331 | * The reservation window structure is only dynamically allocated | ||
332 | * and linked to ext3 inode the first time the open file | ||
333 | * needs a new block. So, before every ext3_new_block(s) call, for | ||
334 | * regular files, we should check whether the reservation window | ||
335 | * structure exists or not. In the latter case, this function is called. | ||
336 | * Fail to do so will result in block reservation being turned off for that | ||
337 | * open file. | ||
338 | * | ||
339 | * This function is called from ext3_get_blocks_handle(), also called | ||
340 | * when setting the reservation window size through ioctl before the file | ||
341 | * is open for write (needs block allocation). | ||
342 | * | ||
343 | * Needs truncate_mutex protection prior to call this function. | ||
344 | */ | ||
261 | void ext3_init_block_alloc_info(struct inode *inode) | 345 | void ext3_init_block_alloc_info(struct inode *inode) |
262 | { | 346 | { |
263 | struct ext3_inode_info *ei = EXT3_I(inode); | 347 | struct ext3_inode_info *ei = EXT3_I(inode); |
@@ -271,7 +355,7 @@ void ext3_init_block_alloc_info(struct inode *inode) | |||
271 | rsv->rsv_start = EXT3_RESERVE_WINDOW_NOT_ALLOCATED; | 355 | rsv->rsv_start = EXT3_RESERVE_WINDOW_NOT_ALLOCATED; |
272 | rsv->rsv_end = EXT3_RESERVE_WINDOW_NOT_ALLOCATED; | 356 | rsv->rsv_end = EXT3_RESERVE_WINDOW_NOT_ALLOCATED; |
273 | 357 | ||
274 | /* | 358 | /* |
275 | * if filesystem is mounted with NORESERVATION, the goal | 359 | * if filesystem is mounted with NORESERVATION, the goal |
276 | * reservation window size is set to zero to indicate | 360 | * reservation window size is set to zero to indicate |
277 | * block reservation is off | 361 | * block reservation is off |
@@ -287,6 +371,19 @@ void ext3_init_block_alloc_info(struct inode *inode) | |||
287 | ei->i_block_alloc_info = block_i; | 371 | ei->i_block_alloc_info = block_i; |
288 | } | 372 | } |
289 | 373 | ||
374 | /** | ||
375 | * ext3_discard_reservation() | ||
376 | * @inode: inode | ||
377 | * | ||
378 | * Discard(free) block reservation window on last file close, or truncate | ||
379 | * or at last iput(). | ||
380 | * | ||
381 | * It is being called in three cases: | ||
382 | * ext3_release_file(): last writer close the file | ||
383 | * ext3_clear_inode(): last iput(), when nobody link to this file. | ||
384 | * ext3_truncate(): when the block indirect map is about to change. | ||
385 | * | ||
386 | */ | ||
290 | void ext3_discard_reservation(struct inode *inode) | 387 | void ext3_discard_reservation(struct inode *inode) |
291 | { | 388 | { |
292 | struct ext3_inode_info *ei = EXT3_I(inode); | 389 | struct ext3_inode_info *ei = EXT3_I(inode); |
@@ -306,7 +403,14 @@ void ext3_discard_reservation(struct inode *inode) | |||
306 | } | 403 | } |
307 | } | 404 | } |
308 | 405 | ||
309 | /* Free given blocks, update quota and i_blocks field */ | 406 | /** |
407 | * ext3_free_blocks_sb() -- Free given blocks and update quota | ||
408 | * @handle: handle to this transaction | ||
409 | * @sb: super block | ||
410 | * @block: start physcial block to free | ||
411 | * @count: number of blocks to free | ||
412 | * @pdquot_freed_blocks: pointer to quota | ||
413 | */ | ||
310 | void ext3_free_blocks_sb(handle_t *handle, struct super_block *sb, | 414 | void ext3_free_blocks_sb(handle_t *handle, struct super_block *sb, |
311 | ext3_fsblk_t block, unsigned long count, | 415 | ext3_fsblk_t block, unsigned long count, |
312 | unsigned long *pdquot_freed_blocks) | 416 | unsigned long *pdquot_freed_blocks) |
@@ -419,8 +523,8 @@ do_more: | |||
419 | } | 523 | } |
420 | /* @@@ This prevents newly-allocated data from being | 524 | /* @@@ This prevents newly-allocated data from being |
421 | * freed and then reallocated within the same | 525 | * freed and then reallocated within the same |
422 | * transaction. | 526 | * transaction. |
423 | * | 527 | * |
424 | * Ideally we would want to allow that to happen, but to | 528 | * Ideally we would want to allow that to happen, but to |
425 | * do so requires making journal_forget() capable of | 529 | * do so requires making journal_forget() capable of |
426 | * revoking the queued write of a data block, which | 530 | * revoking the queued write of a data block, which |
@@ -433,7 +537,7 @@ do_more: | |||
433 | * safe not to set the allocation bit in the committed | 537 | * safe not to set the allocation bit in the committed |
434 | * bitmap, because we know that there is no outstanding | 538 | * bitmap, because we know that there is no outstanding |
435 | * activity on the buffer any more and so it is safe to | 539 | * activity on the buffer any more and so it is safe to |
436 | * reallocate it. | 540 | * reallocate it. |
437 | */ | 541 | */ |
438 | BUFFER_TRACE(bitmap_bh, "set in b_committed_data"); | 542 | BUFFER_TRACE(bitmap_bh, "set in b_committed_data"); |
439 | J_ASSERT_BH(bitmap_bh, | 543 | J_ASSERT_BH(bitmap_bh, |
@@ -490,7 +594,13 @@ error_return: | |||
490 | return; | 594 | return; |
491 | } | 595 | } |
492 | 596 | ||
493 | /* Free given blocks, update quota and i_blocks field */ | 597 | /** |
598 | * ext3_free_blocks() -- Free given blocks and update quota | ||
599 | * @handle: handle for this transaction | ||
600 | * @inode: inode | ||
601 | * @block: start physical block to free | ||
602 | * @count: number of blocks to count | ||
603 | */ | ||
494 | void ext3_free_blocks(handle_t *handle, struct inode *inode, | 604 | void ext3_free_blocks(handle_t *handle, struct inode *inode, |
495 | ext3_fsblk_t block, unsigned long count) | 605 | ext3_fsblk_t block, unsigned long count) |
496 | { | 606 | { |
@@ -508,7 +618,11 @@ void ext3_free_blocks(handle_t *handle, struct inode *inode, | |||
508 | return; | 618 | return; |
509 | } | 619 | } |
510 | 620 | ||
511 | /* | 621 | /** |
622 | * ext3_test_allocatable() | ||
623 | * @nr: given allocation block group | ||
624 | * @bh: bufferhead contains the bitmap of the given block group | ||
625 | * | ||
512 | * For ext3 allocations, we must not reuse any blocks which are | 626 | * For ext3 allocations, we must not reuse any blocks which are |
513 | * allocated in the bitmap buffer's "last committed data" copy. This | 627 | * allocated in the bitmap buffer's "last committed data" copy. This |
514 | * prevents deletes from freeing up the page for reuse until we have | 628 | * prevents deletes from freeing up the page for reuse until we have |
@@ -518,7 +632,7 @@ void ext3_free_blocks(handle_t *handle, struct inode *inode, | |||
518 | * data would allow the old block to be overwritten before the | 632 | * data would allow the old block to be overwritten before the |
519 | * transaction committed (because we force data to disk before commit). | 633 | * transaction committed (because we force data to disk before commit). |
520 | * This would lead to corruption if we crashed between overwriting the | 634 | * This would lead to corruption if we crashed between overwriting the |
521 | * data and committing the delete. | 635 | * data and committing the delete. |
522 | * | 636 | * |
523 | * @@@ We may want to make this allocation behaviour conditional on | 637 | * @@@ We may want to make this allocation behaviour conditional on |
524 | * data-writes at some point, and disable it for metadata allocations or | 638 | * data-writes at some point, and disable it for metadata allocations or |
@@ -541,6 +655,16 @@ static int ext3_test_allocatable(ext3_grpblk_t nr, struct buffer_head *bh) | |||
541 | return ret; | 655 | return ret; |
542 | } | 656 | } |
543 | 657 | ||
658 | /** | ||
659 | * bitmap_search_next_usable_block() | ||
660 | * @start: the starting block (group relative) of the search | ||
661 | * @bh: bufferhead contains the block group bitmap | ||
662 | * @maxblocks: the ending block (group relative) of the reservation | ||
663 | * | ||
664 | * The bitmap search --- search forward alternately through the actual | ||
665 | * bitmap on disk and the last-committed copy in journal, until we find a | ||
666 | * bit free in both bitmaps. | ||
667 | */ | ||
544 | static ext3_grpblk_t | 668 | static ext3_grpblk_t |
545 | bitmap_search_next_usable_block(ext3_grpblk_t start, struct buffer_head *bh, | 669 | bitmap_search_next_usable_block(ext3_grpblk_t start, struct buffer_head *bh, |
546 | ext3_grpblk_t maxblocks) | 670 | ext3_grpblk_t maxblocks) |
@@ -548,11 +672,6 @@ bitmap_search_next_usable_block(ext3_grpblk_t start, struct buffer_head *bh, | |||
548 | ext3_grpblk_t next; | 672 | ext3_grpblk_t next; |
549 | struct journal_head *jh = bh2jh(bh); | 673 | struct journal_head *jh = bh2jh(bh); |
550 | 674 | ||
551 | /* | ||
552 | * The bitmap search --- search forward alternately through the actual | ||
553 | * bitmap and the last-committed copy until we find a bit free in | ||
554 | * both | ||
555 | */ | ||
556 | while (start < maxblocks) { | 675 | while (start < maxblocks) { |
557 | next = ext3_find_next_zero_bit(bh->b_data, maxblocks, start); | 676 | next = ext3_find_next_zero_bit(bh->b_data, maxblocks, start); |
558 | if (next >= maxblocks) | 677 | if (next >= maxblocks) |
@@ -562,14 +681,20 @@ bitmap_search_next_usable_block(ext3_grpblk_t start, struct buffer_head *bh, | |||
562 | jbd_lock_bh_state(bh); | 681 | jbd_lock_bh_state(bh); |
563 | if (jh->b_committed_data) | 682 | if (jh->b_committed_data) |
564 | start = ext3_find_next_zero_bit(jh->b_committed_data, | 683 | start = ext3_find_next_zero_bit(jh->b_committed_data, |
565 | maxblocks, next); | 684 | maxblocks, next); |
566 | jbd_unlock_bh_state(bh); | 685 | jbd_unlock_bh_state(bh); |
567 | } | 686 | } |
568 | return -1; | 687 | return -1; |
569 | } | 688 | } |
570 | 689 | ||
571 | /* | 690 | /** |
572 | * Find an allocatable block in a bitmap. We honour both the bitmap and | 691 | * find_next_usable_block() |
692 | * @start: the starting block (group relative) to find next | ||
693 | * allocatable block in bitmap. | ||
694 | * @bh: bufferhead contains the block group bitmap | ||
695 | * @maxblocks: the ending block (group relative) for the search | ||
696 | * | ||
697 | * Find an allocatable block in a bitmap. We honor both the bitmap and | ||
573 | * its last-committed copy (if that exists), and perform the "most | 698 | * its last-committed copy (if that exists), and perform the "most |
574 | * appropriate allocation" algorithm of looking for a free block near | 699 | * appropriate allocation" algorithm of looking for a free block near |
575 | * the initial goal; then for a free byte somewhere in the bitmap; then | 700 | * the initial goal; then for a free byte somewhere in the bitmap; then |
@@ -584,7 +709,7 @@ find_next_usable_block(ext3_grpblk_t start, struct buffer_head *bh, | |||
584 | 709 | ||
585 | if (start > 0) { | 710 | if (start > 0) { |
586 | /* | 711 | /* |
587 | * The goal was occupied; search forward for a free | 712 | * The goal was occupied; search forward for a free |
588 | * block within the next XX blocks. | 713 | * block within the next XX blocks. |
589 | * | 714 | * |
590 | * end_goal is more or less random, but it has to be | 715 | * end_goal is more or less random, but it has to be |
@@ -620,7 +745,11 @@ find_next_usable_block(ext3_grpblk_t start, struct buffer_head *bh, | |||
620 | return here; | 745 | return here; |
621 | } | 746 | } |
622 | 747 | ||
623 | /* | 748 | /** |
749 | * claim_block() | ||
750 | * @block: the free block (group relative) to allocate | ||
751 | * @bh: the bufferhead containts the block group bitmap | ||
752 | * | ||
624 | * We think we can allocate this block in this bitmap. Try to set the bit. | 753 | * We think we can allocate this block in this bitmap. Try to set the bit. |
625 | * If that succeeds then check that nobody has allocated and then freed the | 754 | * If that succeeds then check that nobody has allocated and then freed the |
626 | * block since we saw that is was not marked in b_committed_data. If it _was_ | 755 | * block since we saw that is was not marked in b_committed_data. If it _was_ |
@@ -646,7 +775,26 @@ claim_block(spinlock_t *lock, ext3_grpblk_t block, struct buffer_head *bh) | |||
646 | return ret; | 775 | return ret; |
647 | } | 776 | } |
648 | 777 | ||
649 | /* | 778 | /** |
779 | * ext3_try_to_allocate() | ||
780 | * @sb: superblock | ||
781 | * @handle: handle to this transaction | ||
782 | * @group: given allocation block group | ||
783 | * @bitmap_bh: bufferhead holds the block bitmap | ||
784 | * @grp_goal: given target block within the group | ||
785 | * @count: target number of blocks to allocate | ||
786 | * @my_rsv: reservation window | ||
787 | * | ||
788 | * Attempt to allocate blocks within a give range. Set the range of allocation | ||
789 | * first, then find the first free bit(s) from the bitmap (within the range), | ||
790 | * and at last, allocate the blocks by claiming the found free bit as allocated. | ||
791 | * | ||
792 | * To set the range of this allocation: | ||
793 | * if there is a reservation window, only try to allocate block(s) from the | ||
794 | * file's own reservation window; | ||
795 | * Otherwise, the allocation range starts from the give goal block, ends at | ||
796 | * the block group's last block. | ||
797 | * | ||
650 | * If we failed to allocate the desired block then we may end up crossing to a | 798 | * If we failed to allocate the desired block then we may end up crossing to a |
651 | * new bitmap. In that case we must release write access to the old one via | 799 | * new bitmap. In that case we must release write access to the old one via |
652 | * ext3_journal_release_buffer(), else we'll run out of credits. | 800 | * ext3_journal_release_buffer(), else we'll run out of credits. |
@@ -703,7 +851,8 @@ repeat: | |||
703 | } | 851 | } |
704 | start = grp_goal; | 852 | start = grp_goal; |
705 | 853 | ||
706 | if (!claim_block(sb_bgl_lock(EXT3_SB(sb), group), grp_goal, bitmap_bh)) { | 854 | if (!claim_block(sb_bgl_lock(EXT3_SB(sb), group), |
855 | grp_goal, bitmap_bh)) { | ||
707 | /* | 856 | /* |
708 | * The block was allocated by another thread, or it was | 857 | * The block was allocated by another thread, or it was |
709 | * allocated and then freed by another thread | 858 | * allocated and then freed by another thread |
@@ -718,7 +867,8 @@ repeat: | |||
718 | grp_goal++; | 867 | grp_goal++; |
719 | while (num < *count && grp_goal < end | 868 | while (num < *count && grp_goal < end |
720 | && ext3_test_allocatable(grp_goal, bitmap_bh) | 869 | && ext3_test_allocatable(grp_goal, bitmap_bh) |
721 | && claim_block(sb_bgl_lock(EXT3_SB(sb), group), grp_goal, bitmap_bh)) { | 870 | && claim_block(sb_bgl_lock(EXT3_SB(sb), group), |
871 | grp_goal, bitmap_bh)) { | ||
722 | num++; | 872 | num++; |
723 | grp_goal++; | 873 | grp_goal++; |
724 | } | 874 | } |
@@ -730,12 +880,12 @@ fail_access: | |||
730 | } | 880 | } |
731 | 881 | ||
732 | /** | 882 | /** |
733 | * find_next_reservable_window(): | 883 | * find_next_reservable_window(): |
734 | * find a reservable space within the given range. | 884 | * find a reservable space within the given range. |
735 | * It does not allocate the reservation window for now: | 885 | * It does not allocate the reservation window for now: |
736 | * alloc_new_reservation() will do the work later. | 886 | * alloc_new_reservation() will do the work later. |
737 | * | 887 | * |
738 | * @search_head: the head of the searching list; | 888 | * @search_head: the head of the searching list; |
739 | * This is not necessarily the list head of the whole filesystem | 889 | * This is not necessarily the list head of the whole filesystem |
740 | * | 890 | * |
741 | * We have both head and start_block to assist the search | 891 | * We have both head and start_block to assist the search |
@@ -743,12 +893,12 @@ fail_access: | |||
743 | * but we will shift to the place where start_block is, | 893 | * but we will shift to the place where start_block is, |
744 | * then start from there, when looking for a reservable space. | 894 | * then start from there, when looking for a reservable space. |
745 | * | 895 | * |
746 | * @size: the target new reservation window size | 896 | * @size: the target new reservation window size |
747 | * | 897 | * |
748 | * @group_first_block: the first block we consider to start | 898 | * @group_first_block: the first block we consider to start |
749 | * the real search from | 899 | * the real search from |
750 | * | 900 | * |
751 | * @last_block: | 901 | * @last_block: |
752 | * the maximum block number that our goal reservable space | 902 | * the maximum block number that our goal reservable space |
753 | * could start from. This is normally the last block in this | 903 | * could start from. This is normally the last block in this |
754 | * group. The search will end when we found the start of next | 904 | * group. The search will end when we found the start of next |
@@ -756,10 +906,10 @@ fail_access: | |||
756 | * This could handle the cross boundary reservation window | 906 | * This could handle the cross boundary reservation window |
757 | * request. | 907 | * request. |
758 | * | 908 | * |
759 | * basically we search from the given range, rather than the whole | 909 | * basically we search from the given range, rather than the whole |
760 | * reservation double linked list, (start_block, last_block) | 910 | * reservation double linked list, (start_block, last_block) |
761 | * to find a free region that is of my size and has not | 911 | * to find a free region that is of my size and has not |
762 | * been reserved. | 912 | * been reserved. |
763 | * | 913 | * |
764 | */ | 914 | */ |
765 | static int find_next_reservable_window( | 915 | static int find_next_reservable_window( |
@@ -812,7 +962,7 @@ static int find_next_reservable_window( | |||
812 | /* | 962 | /* |
813 | * Found a reserveable space big enough. We could | 963 | * Found a reserveable space big enough. We could |
814 | * have a reservation across the group boundary here | 964 | * have a reservation across the group boundary here |
815 | */ | 965 | */ |
816 | break; | 966 | break; |
817 | } | 967 | } |
818 | } | 968 | } |
@@ -848,7 +998,7 @@ static int find_next_reservable_window( | |||
848 | } | 998 | } |
849 | 999 | ||
850 | /** | 1000 | /** |
851 | * alloc_new_reservation()--allocate a new reservation window | 1001 | * alloc_new_reservation()--allocate a new reservation window |
852 | * | 1002 | * |
853 | * To make a new reservation, we search part of the filesystem | 1003 | * To make a new reservation, we search part of the filesystem |
854 | * reservation list (the list that inside the group). We try to | 1004 | * reservation list (the list that inside the group). We try to |
@@ -897,7 +1047,7 @@ static int alloc_new_reservation(struct ext3_reserve_window_node *my_rsv, | |||
897 | spinlock_t *rsv_lock = &EXT3_SB(sb)->s_rsv_window_lock; | 1047 | spinlock_t *rsv_lock = &EXT3_SB(sb)->s_rsv_window_lock; |
898 | 1048 | ||
899 | group_first_block = ext3_group_first_block_no(sb, group); | 1049 | group_first_block = ext3_group_first_block_no(sb, group); |
900 | group_end_block = group_first_block + EXT3_BLOCKS_PER_GROUP(sb) - 1; | 1050 | group_end_block = group_first_block + (EXT3_BLOCKS_PER_GROUP(sb) - 1); |
901 | 1051 | ||
902 | if (grp_goal < 0) | 1052 | if (grp_goal < 0) |
903 | start_block = group_first_block; | 1053 | start_block = group_first_block; |
@@ -929,9 +1079,10 @@ static int alloc_new_reservation(struct ext3_reserve_window_node *my_rsv, | |||
929 | if ((my_rsv->rsv_alloc_hit > | 1079 | if ((my_rsv->rsv_alloc_hit > |
930 | (my_rsv->rsv_end - my_rsv->rsv_start + 1) / 2)) { | 1080 | (my_rsv->rsv_end - my_rsv->rsv_start + 1) / 2)) { |
931 | /* | 1081 | /* |
932 | * if we previously allocation hit ration is greater than half | 1082 | * if the previously allocation hit ratio is |
933 | * we double the size of reservation window next time | 1083 | * greater than 1/2, then we double the size of |
934 | * otherwise keep the same | 1084 | * the reservation window the next time, |
1085 | * otherwise we keep the same size window | ||
935 | */ | 1086 | */ |
936 | size = size * 2; | 1087 | size = size * 2; |
937 | if (size > EXT3_MAX_RESERVE_BLOCKS) | 1088 | if (size > EXT3_MAX_RESERVE_BLOCKS) |
@@ -1010,6 +1161,23 @@ retry: | |||
1010 | goto retry; | 1161 | goto retry; |
1011 | } | 1162 | } |
1012 | 1163 | ||
1164 | /** | ||
1165 | * try_to_extend_reservation() | ||
1166 | * @my_rsv: given reservation window | ||
1167 | * @sb: super block | ||
1168 | * @size: the delta to extend | ||
1169 | * | ||
1170 | * Attempt to expand the reservation window large enough to have | ||
1171 | * required number of free blocks | ||
1172 | * | ||
1173 | * Since ext3_try_to_allocate() will always allocate blocks within | ||
1174 | * the reservation window range, if the window size is too small, | ||
1175 | * multiple blocks allocation has to stop at the end of the reservation | ||
1176 | * window. To make this more efficient, given the total number of | ||
1177 | * blocks needed and the current size of the window, we try to | ||
1178 | * expand the reservation window size if necessary on a best-effort | ||
1179 | * basis before ext3_new_blocks() tries to allocate blocks, | ||
1180 | */ | ||
1013 | static void try_to_extend_reservation(struct ext3_reserve_window_node *my_rsv, | 1181 | static void try_to_extend_reservation(struct ext3_reserve_window_node *my_rsv, |
1014 | struct super_block *sb, int size) | 1182 | struct super_block *sb, int size) |
1015 | { | 1183 | { |
@@ -1035,7 +1203,17 @@ static void try_to_extend_reservation(struct ext3_reserve_window_node *my_rsv, | |||
1035 | spin_unlock(rsv_lock); | 1203 | spin_unlock(rsv_lock); |
1036 | } | 1204 | } |
1037 | 1205 | ||
1038 | /* | 1206 | /** |
1207 | * ext3_try_to_allocate_with_rsv() | ||
1208 | * @sb: superblock | ||
1209 | * @handle: handle to this transaction | ||
1210 | * @group: given allocation block group | ||
1211 | * @bitmap_bh: bufferhead holds the block bitmap | ||
1212 | * @grp_goal: given target block within the group | ||
1213 | * @count: target number of blocks to allocate | ||
1214 | * @my_rsv: reservation window | ||
1215 | * @errp: pointer to store the error code | ||
1216 | * | ||
1039 | * This is the main function used to allocate a new block and its reservation | 1217 | * This is the main function used to allocate a new block and its reservation |
1040 | * window. | 1218 | * window. |
1041 | * | 1219 | * |
@@ -1051,9 +1229,7 @@ static void try_to_extend_reservation(struct ext3_reserve_window_node *my_rsv, | |||
1051 | * reservation), and there are lots of free blocks, but they are all | 1229 | * reservation), and there are lots of free blocks, but they are all |
1052 | * being reserved. | 1230 | * being reserved. |
1053 | * | 1231 | * |
1054 | * We use a sorted double linked list for the per-filesystem reservation list. | 1232 | * We use a red-black tree for the per-filesystem reservation list. |
1055 | * The insert, remove and find a free space(non-reserved) operations for the | ||
1056 | * sorted double linked list should be fast. | ||
1057 | * | 1233 | * |
1058 | */ | 1234 | */ |
1059 | static ext3_grpblk_t | 1235 | static ext3_grpblk_t |
@@ -1063,7 +1239,7 @@ ext3_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle, | |||
1063 | struct ext3_reserve_window_node * my_rsv, | 1239 | struct ext3_reserve_window_node * my_rsv, |
1064 | unsigned long *count, int *errp) | 1240 | unsigned long *count, int *errp) |
1065 | { | 1241 | { |
1066 | ext3_fsblk_t group_first_block; | 1242 | ext3_fsblk_t group_first_block, group_last_block; |
1067 | ext3_grpblk_t ret = 0; | 1243 | ext3_grpblk_t ret = 0; |
1068 | int fatal; | 1244 | int fatal; |
1069 | unsigned long num = *count; | 1245 | unsigned long num = *count; |
@@ -1100,6 +1276,7 @@ ext3_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle, | |||
1100 | * first block is the block number of the first block in this group | 1276 | * first block is the block number of the first block in this group |
1101 | */ | 1277 | */ |
1102 | group_first_block = ext3_group_first_block_no(sb, group); | 1278 | group_first_block = ext3_group_first_block_no(sb, group); |
1279 | group_last_block = group_first_block + (EXT3_BLOCKS_PER_GROUP(sb) - 1); | ||
1103 | 1280 | ||
1104 | /* | 1281 | /* |
1105 | * Basically we will allocate a new block from inode's reservation | 1282 | * Basically we will allocate a new block from inode's reservation |
@@ -1118,7 +1295,8 @@ ext3_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle, | |||
1118 | */ | 1295 | */ |
1119 | while (1) { | 1296 | while (1) { |
1120 | if (rsv_is_empty(&my_rsv->rsv_window) || (ret < 0) || | 1297 | if (rsv_is_empty(&my_rsv->rsv_window) || (ret < 0) || |
1121 | !goal_in_my_reservation(&my_rsv->rsv_window, grp_goal, group, sb)) { | 1298 | !goal_in_my_reservation(&my_rsv->rsv_window, |
1299 | grp_goal, group, sb)) { | ||
1122 | if (my_rsv->rsv_goal_size < *count) | 1300 | if (my_rsv->rsv_goal_size < *count) |
1123 | my_rsv->rsv_goal_size = *count; | 1301 | my_rsv->rsv_goal_size = *count; |
1124 | ret = alloc_new_reservation(my_rsv, grp_goal, sb, | 1302 | ret = alloc_new_reservation(my_rsv, grp_goal, sb, |
@@ -1126,17 +1304,21 @@ ext3_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle, | |||
1126 | if (ret < 0) | 1304 | if (ret < 0) |
1127 | break; /* failed */ | 1305 | break; /* failed */ |
1128 | 1306 | ||
1129 | if (!goal_in_my_reservation(&my_rsv->rsv_window, grp_goal, group, sb)) | 1307 | if (!goal_in_my_reservation(&my_rsv->rsv_window, |
1308 | grp_goal, group, sb)) | ||
1130 | grp_goal = -1; | 1309 | grp_goal = -1; |
1131 | } else if (grp_goal > 0 && (my_rsv->rsv_end-grp_goal+1) < *count) | 1310 | } else if (grp_goal > 0 && |
1311 | (my_rsv->rsv_end-grp_goal+1) < *count) | ||
1132 | try_to_extend_reservation(my_rsv, sb, | 1312 | try_to_extend_reservation(my_rsv, sb, |
1133 | *count-my_rsv->rsv_end + grp_goal - 1); | 1313 | *count-my_rsv->rsv_end + grp_goal - 1); |
1134 | 1314 | ||
1135 | if ((my_rsv->rsv_start >= group_first_block + EXT3_BLOCKS_PER_GROUP(sb)) | 1315 | if ((my_rsv->rsv_start > group_last_block) || |
1136 | || (my_rsv->rsv_end < group_first_block)) | 1316 | (my_rsv->rsv_end < group_first_block)) { |
1317 | rsv_window_dump(&EXT3_SB(sb)->s_rsv_window_root, 1); | ||
1137 | BUG(); | 1318 | BUG(); |
1138 | ret = ext3_try_to_allocate(sb, handle, group, bitmap_bh, grp_goal, | 1319 | } |
1139 | &num, &my_rsv->rsv_window); | 1320 | ret = ext3_try_to_allocate(sb, handle, group, bitmap_bh, |
1321 | grp_goal, &num, &my_rsv->rsv_window); | ||
1140 | if (ret >= 0) { | 1322 | if (ret >= 0) { |
1141 | my_rsv->rsv_alloc_hit += num; | 1323 | my_rsv->rsv_alloc_hit += num; |
1142 | *count = num; | 1324 | *count = num; |
@@ -1161,6 +1343,12 @@ out: | |||
1161 | return ret; | 1343 | return ret; |
1162 | } | 1344 | } |
1163 | 1345 | ||
1346 | /** | ||
1347 | * ext3_has_free_blocks() | ||
1348 | * @sbi: in-core super block structure. | ||
1349 | * | ||
1350 | * Check if filesystem has at least 1 free block available for allocation. | ||
1351 | */ | ||
1164 | static int ext3_has_free_blocks(struct ext3_sb_info *sbi) | 1352 | static int ext3_has_free_blocks(struct ext3_sb_info *sbi) |
1165 | { | 1353 | { |
1166 | ext3_fsblk_t free_blocks, root_blocks; | 1354 | ext3_fsblk_t free_blocks, root_blocks; |
@@ -1175,11 +1363,17 @@ static int ext3_has_free_blocks(struct ext3_sb_info *sbi) | |||
1175 | return 1; | 1363 | return 1; |
1176 | } | 1364 | } |
1177 | 1365 | ||
1178 | /* | 1366 | /** |
1367 | * ext3_should_retry_alloc() | ||
1368 | * @sb: super block | ||
1369 | * @retries number of attemps has been made | ||
1370 | * | ||
1179 | * ext3_should_retry_alloc() is called when ENOSPC is returned, and if | 1371 | * ext3_should_retry_alloc() is called when ENOSPC is returned, and if |
1180 | * it is profitable to retry the operation, this function will wait | 1372 | * it is profitable to retry the operation, this function will wait |
1181 | * for the current or commiting transaction to complete, and then | 1373 | * for the current or commiting transaction to complete, and then |
1182 | * return TRUE. | 1374 | * return TRUE. |
1375 | * | ||
1376 | * if the total number of retries exceed three times, return FALSE. | ||
1183 | */ | 1377 | */ |
1184 | int ext3_should_retry_alloc(struct super_block *sb, int *retries) | 1378 | int ext3_should_retry_alloc(struct super_block *sb, int *retries) |
1185 | { | 1379 | { |
@@ -1191,13 +1385,19 @@ int ext3_should_retry_alloc(struct super_block *sb, int *retries) | |||
1191 | return journal_force_commit_nested(EXT3_SB(sb)->s_journal); | 1385 | return journal_force_commit_nested(EXT3_SB(sb)->s_journal); |
1192 | } | 1386 | } |
1193 | 1387 | ||
1194 | /* | 1388 | /** |
1195 | * ext3_new_block uses a goal block to assist allocation. If the goal is | 1389 | * ext3_new_blocks() -- core block(s) allocation function |
1196 | * free, or there is a free block within 32 blocks of the goal, that block | 1390 | * @handle: handle to this transaction |
1197 | * is allocated. Otherwise a forward search is made for a free block; within | 1391 | * @inode: file inode |
1198 | * each block group the search first looks for an entire free byte in the block | 1392 | * @goal: given target block(filesystem wide) |
1199 | * bitmap, and then for any free bit if that fails. | 1393 | * @count: target number of blocks to allocate |
1200 | * This function also updates quota and i_blocks field. | 1394 | * @errp: error code |
1395 | * | ||
1396 | * ext3_new_blocks uses a goal block to assist allocation. It tries to | ||
1397 | * allocate block(s) from the block group contains the goal block first. If that | ||
1398 | * fails, it will try to allocate block(s) from other block groups without | ||
1399 | * any specific goal block. | ||
1400 | * | ||
1201 | */ | 1401 | */ |
1202 | ext3_fsblk_t ext3_new_blocks(handle_t *handle, struct inode *inode, | 1402 | ext3_fsblk_t ext3_new_blocks(handle_t *handle, struct inode *inode, |
1203 | ext3_fsblk_t goal, unsigned long *count, int *errp) | 1403 | ext3_fsblk_t goal, unsigned long *count, int *errp) |
@@ -1303,7 +1503,7 @@ retry_alloc: | |||
1303 | smp_rmb(); | 1503 | smp_rmb(); |
1304 | 1504 | ||
1305 | /* | 1505 | /* |
1306 | * Now search the rest of the groups. We assume that | 1506 | * Now search the rest of the groups. We assume that |
1307 | * i and gdp correctly point to the last group visited. | 1507 | * i and gdp correctly point to the last group visited. |
1308 | */ | 1508 | */ |
1309 | for (bgi = 0; bgi < ngroups; bgi++) { | 1509 | for (bgi = 0; bgi < ngroups; bgi++) { |
@@ -1428,7 +1628,7 @@ allocated: | |||
1428 | 1628 | ||
1429 | spin_lock(sb_bgl_lock(sbi, group_no)); | 1629 | spin_lock(sb_bgl_lock(sbi, group_no)); |
1430 | gdp->bg_free_blocks_count = | 1630 | gdp->bg_free_blocks_count = |
1431 | cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) - num); | 1631 | cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count)-num); |
1432 | spin_unlock(sb_bgl_lock(sbi, group_no)); | 1632 | spin_unlock(sb_bgl_lock(sbi, group_no)); |
1433 | percpu_counter_mod(&sbi->s_freeblocks_counter, -num); | 1633 | percpu_counter_mod(&sbi->s_freeblocks_counter, -num); |
1434 | 1634 | ||
@@ -1471,6 +1671,12 @@ ext3_fsblk_t ext3_new_block(handle_t *handle, struct inode *inode, | |||
1471 | return ext3_new_blocks(handle, inode, goal, &count, errp); | 1671 | return ext3_new_blocks(handle, inode, goal, &count, errp); |
1472 | } | 1672 | } |
1473 | 1673 | ||
1674 | /** | ||
1675 | * ext3_count_free_blocks() -- count filesystem free blocks | ||
1676 | * @sb: superblock | ||
1677 | * | ||
1678 | * Adds up the number of free blocks from each block group. | ||
1679 | */ | ||
1474 | ext3_fsblk_t ext3_count_free_blocks(struct super_block *sb) | 1680 | ext3_fsblk_t ext3_count_free_blocks(struct super_block *sb) |
1475 | { | 1681 | { |
1476 | ext3_fsblk_t desc_count; | 1682 | ext3_fsblk_t desc_count; |