aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext3/balloc.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ext3/balloc.c')
-rw-r--r--fs/ext3/balloc.c350
1 files changed, 278 insertions, 72 deletions
diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c
index 063d994bda0b..b41a7d7e20f0 100644
--- a/fs/ext3/balloc.c
+++ b/fs/ext3/balloc.c
@@ -38,6 +38,13 @@
38 38
39#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) 39#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1)
40 40
41/**
42 * ext3_get_group_desc() -- load group descriptor from disk
43 * @sb: super block
44 * @block_group: given block group
45 * @bh: pointer to the buffer head to store the block
46 * group descriptor
47 */
41struct ext3_group_desc * ext3_get_group_desc(struct super_block * sb, 48struct ext3_group_desc * ext3_get_group_desc(struct super_block * sb,
42 unsigned int block_group, 49 unsigned int block_group,
43 struct buffer_head ** bh) 50 struct buffer_head ** bh)
@@ -73,8 +80,12 @@ struct ext3_group_desc * ext3_get_group_desc(struct super_block * sb,
73 return desc + offset; 80 return desc + offset;
74} 81}
75 82
76/* 83/**
77 * Read the bitmap for a given block_group, reading into the specified 84 * read_block_bitmap()
85 * @sb: super block
86 * @block_group: given block group
87 *
88 * Read the bitmap for a given block_group, reading into the specified
78 * slot in the superblock's bitmap cache. 89 * slot in the superblock's bitmap cache.
79 * 90 *
80 * Return buffer_head on success or NULL in case of failure. 91 * Return buffer_head on success or NULL in case of failure.
@@ -103,15 +114,22 @@ error_out:
103 * Operations include: 114 * Operations include:
104 * dump, find, add, remove, is_empty, find_next_reservable_window, etc. 115 * dump, find, add, remove, is_empty, find_next_reservable_window, etc.
105 * 116 *
106 * We use sorted double linked list for the per-filesystem reservation 117 * We use a red-black tree to represent per-filesystem reservation
107 * window list. (like in vm_region). 118 * windows.
119 *
120 */
121
122/**
123 * __rsv_window_dump() -- Dump the filesystem block allocation reservation map
124 * @rb_root: root of per-filesystem reservation rb tree
125 * @verbose: verbose mode
126 * @fn: function which wishes to dump the reservation map
108 * 127 *
109 * Initially, we keep those small operations in the abstract functions, 128 * If verbose is turned on, it will print the whole block reservation
110 * so later if we need a better searching tree than double linked-list, 129 * windows(start, end). Otherwise, it will only print out the "bad" windows,
111 * we could easily switch to that without changing too much 130 * those windows that overlap with their immediate neighbors.
112 * code.
113 */ 131 */
114#if 0 132#if 1
115static void __rsv_window_dump(struct rb_root *root, int verbose, 133static void __rsv_window_dump(struct rb_root *root, int verbose,
116 const char *fn) 134 const char *fn)
117{ 135{
@@ -129,7 +147,7 @@ restart:
129 rsv = list_entry(n, struct ext3_reserve_window_node, rsv_node); 147 rsv = list_entry(n, struct ext3_reserve_window_node, rsv_node);
130 if (verbose) 148 if (verbose)
131 printk("reservation window 0x%p " 149 printk("reservation window 0x%p "
132 "start: %d, end: %d\n", 150 "start: %lu, end: %lu\n",
133 rsv, rsv->rsv_start, rsv->rsv_end); 151 rsv, rsv->rsv_start, rsv->rsv_end);
134 if (rsv->rsv_start && rsv->rsv_start >= rsv->rsv_end) { 152 if (rsv->rsv_start && rsv->rsv_start >= rsv->rsv_end) {
135 printk("Bad reservation %p (start >= end)\n", 153 printk("Bad reservation %p (start >= end)\n",
@@ -161,6 +179,22 @@ restart:
161#define rsv_window_dump(root, verbose) do {} while (0) 179#define rsv_window_dump(root, verbose) do {} while (0)
162#endif 180#endif
163 181
182/**
183 * goal_in_my_reservation()
184 * @rsv: inode's reservation window
185 * @grp_goal: given goal block relative to the allocation block group
186 * @group: the current allocation block group
187 * @sb: filesystem super block
188 *
189 * Test if the given goal block (group relative) is within the file's
190 * own block reservation window range.
191 *
192 * If the reservation window is outside the goal allocation group, return 0;
193 * grp_goal (given goal block) could be -1, which means no specific
194 * goal block. In this case, always return 1.
195 * If the goal block is within the reservation window, return 1;
196 * otherwise, return 0;
197 */
164static int 198static int
165goal_in_my_reservation(struct ext3_reserve_window *rsv, ext3_grpblk_t grp_goal, 199goal_in_my_reservation(struct ext3_reserve_window *rsv, ext3_grpblk_t grp_goal,
166 unsigned int group, struct super_block * sb) 200 unsigned int group, struct super_block * sb)
@@ -168,7 +202,7 @@ goal_in_my_reservation(struct ext3_reserve_window *rsv, ext3_grpblk_t grp_goal,
168 ext3_fsblk_t group_first_block, group_last_block; 202 ext3_fsblk_t group_first_block, group_last_block;
169 203
170 group_first_block = ext3_group_first_block_no(sb, group); 204 group_first_block = ext3_group_first_block_no(sb, group);
171 group_last_block = group_first_block + EXT3_BLOCKS_PER_GROUP(sb) - 1; 205 group_last_block = group_first_block + (EXT3_BLOCKS_PER_GROUP(sb) - 1);
172 206
173 if ((rsv->_rsv_start > group_last_block) || 207 if ((rsv->_rsv_start > group_last_block) ||
174 (rsv->_rsv_end < group_first_block)) 208 (rsv->_rsv_end < group_first_block))
@@ -179,7 +213,11 @@ goal_in_my_reservation(struct ext3_reserve_window *rsv, ext3_grpblk_t grp_goal,
179 return 1; 213 return 1;
180} 214}
181 215
182/* 216/**
217 * search_reserve_window()
218 * @rb_root: root of reservation tree
219 * @goal: target allocation block
220 *
183 * Find the reserved window which includes the goal, or the previous one 221 * Find the reserved window which includes the goal, or the previous one
184 * if the goal is not in any window. 222 * if the goal is not in any window.
185 * Returns NULL if there are no windows or if all windows start after the goal. 223 * Returns NULL if there are no windows or if all windows start after the goal.
@@ -216,6 +254,13 @@ search_reserve_window(struct rb_root *root, ext3_fsblk_t goal)
216 return rsv; 254 return rsv;
217} 255}
218 256
257/**
258 * ext3_rsv_window_add() -- Insert a window to the block reservation rb tree.
259 * @sb: super block
260 * @rsv: reservation window to add
261 *
262 * Must be called with rsv_lock hold.
263 */
219void ext3_rsv_window_add(struct super_block *sb, 264void ext3_rsv_window_add(struct super_block *sb,
220 struct ext3_reserve_window_node *rsv) 265 struct ext3_reserve_window_node *rsv)
221{ 266{
@@ -236,14 +281,25 @@ void ext3_rsv_window_add(struct super_block *sb,
236 p = &(*p)->rb_left; 281 p = &(*p)->rb_left;
237 else if (start > this->rsv_end) 282 else if (start > this->rsv_end)
238 p = &(*p)->rb_right; 283 p = &(*p)->rb_right;
239 else 284 else {
285 rsv_window_dump(root, 1);
240 BUG(); 286 BUG();
287 }
241 } 288 }
242 289
243 rb_link_node(node, parent, p); 290 rb_link_node(node, parent, p);
244 rb_insert_color(node, root); 291 rb_insert_color(node, root);
245} 292}
246 293
294/**
295 * ext3_rsv_window_remove() -- unlink a window from the reservation rb tree
296 * @sb: super block
297 * @rsv: reservation window to remove
298 *
299 * Mark the block reservation window as not allocated, and unlink it
300 * from the filesystem reservation window rb tree. Must be called with
301 * rsv_lock hold.
302 */
247static void rsv_window_remove(struct super_block *sb, 303static void rsv_window_remove(struct super_block *sb,
248 struct ext3_reserve_window_node *rsv) 304 struct ext3_reserve_window_node *rsv)
249{ 305{
@@ -253,11 +309,39 @@ static void rsv_window_remove(struct super_block *sb,
253 rb_erase(&rsv->rsv_node, &EXT3_SB(sb)->s_rsv_window_root); 309 rb_erase(&rsv->rsv_node, &EXT3_SB(sb)->s_rsv_window_root);
254} 310}
255 311
312/*
313 * rsv_is_empty() -- Check if the reservation window is allocated.
314 * @rsv: given reservation window to check
315 *
316 * returns 1 if the end block is EXT3_RESERVE_WINDOW_NOT_ALLOCATED.
317 */
256static inline int rsv_is_empty(struct ext3_reserve_window *rsv) 318static inline int rsv_is_empty(struct ext3_reserve_window *rsv)
257{ 319{
258 /* a valid reservation end block could not be 0 */ 320 /* a valid reservation end block could not be 0 */
259 return (rsv->_rsv_end == EXT3_RESERVE_WINDOW_NOT_ALLOCATED); 321 return rsv->_rsv_end == EXT3_RESERVE_WINDOW_NOT_ALLOCATED;
260} 322}
323
324/**
325 * ext3_init_block_alloc_info()
326 * @inode: file inode structure
327 *
328 * Allocate and initialize the reservation window structure, and
329 * link the window to the ext3 inode structure at last
330 *
331 * The reservation window structure is only dynamically allocated
332 * and linked to ext3 inode the first time the open file
333 * needs a new block. So, before every ext3_new_block(s) call, for
334 * regular files, we should check whether the reservation window
335 * structure exists or not. In the latter case, this function is called.
336 * Fail to do so will result in block reservation being turned off for that
337 * open file.
338 *
339 * This function is called from ext3_get_blocks_handle(), also called
340 * when setting the reservation window size through ioctl before the file
341 * is open for write (needs block allocation).
342 *
343 * Needs truncate_mutex protection prior to call this function.
344 */
261void ext3_init_block_alloc_info(struct inode *inode) 345void ext3_init_block_alloc_info(struct inode *inode)
262{ 346{
263 struct ext3_inode_info *ei = EXT3_I(inode); 347 struct ext3_inode_info *ei = EXT3_I(inode);
@@ -271,7 +355,7 @@ void ext3_init_block_alloc_info(struct inode *inode)
271 rsv->rsv_start = EXT3_RESERVE_WINDOW_NOT_ALLOCATED; 355 rsv->rsv_start = EXT3_RESERVE_WINDOW_NOT_ALLOCATED;
272 rsv->rsv_end = EXT3_RESERVE_WINDOW_NOT_ALLOCATED; 356 rsv->rsv_end = EXT3_RESERVE_WINDOW_NOT_ALLOCATED;
273 357
274 /* 358 /*
275 * if filesystem is mounted with NORESERVATION, the goal 359 * if filesystem is mounted with NORESERVATION, the goal
276 * reservation window size is set to zero to indicate 360 * reservation window size is set to zero to indicate
277 * block reservation is off 361 * block reservation is off
@@ -287,6 +371,19 @@ void ext3_init_block_alloc_info(struct inode *inode)
287 ei->i_block_alloc_info = block_i; 371 ei->i_block_alloc_info = block_i;
288} 372}
289 373
374/**
375 * ext3_discard_reservation()
376 * @inode: inode
377 *
378 * Discard(free) block reservation window on last file close, or truncate
379 * or at last iput().
380 *
381 * It is being called in three cases:
382 * ext3_release_file(): last writer close the file
383 * ext3_clear_inode(): last iput(), when nobody link to this file.
384 * ext3_truncate(): when the block indirect map is about to change.
385 *
386 */
290void ext3_discard_reservation(struct inode *inode) 387void ext3_discard_reservation(struct inode *inode)
291{ 388{
292 struct ext3_inode_info *ei = EXT3_I(inode); 389 struct ext3_inode_info *ei = EXT3_I(inode);
@@ -306,7 +403,14 @@ void ext3_discard_reservation(struct inode *inode)
306 } 403 }
307} 404}
308 405
309/* Free given blocks, update quota and i_blocks field */ 406/**
407 * ext3_free_blocks_sb() -- Free given blocks and update quota
408 * @handle: handle to this transaction
409 * @sb: super block
410 * @block: start physcial block to free
411 * @count: number of blocks to free
412 * @pdquot_freed_blocks: pointer to quota
413 */
310void ext3_free_blocks_sb(handle_t *handle, struct super_block *sb, 414void ext3_free_blocks_sb(handle_t *handle, struct super_block *sb,
311 ext3_fsblk_t block, unsigned long count, 415 ext3_fsblk_t block, unsigned long count,
312 unsigned long *pdquot_freed_blocks) 416 unsigned long *pdquot_freed_blocks)
@@ -419,8 +523,8 @@ do_more:
419 } 523 }
420 /* @@@ This prevents newly-allocated data from being 524 /* @@@ This prevents newly-allocated data from being
421 * freed and then reallocated within the same 525 * freed and then reallocated within the same
422 * transaction. 526 * transaction.
423 * 527 *
424 * Ideally we would want to allow that to happen, but to 528 * Ideally we would want to allow that to happen, but to
425 * do so requires making journal_forget() capable of 529 * do so requires making journal_forget() capable of
426 * revoking the queued write of a data block, which 530 * revoking the queued write of a data block, which
@@ -433,7 +537,7 @@ do_more:
433 * safe not to set the allocation bit in the committed 537 * safe not to set the allocation bit in the committed
434 * bitmap, because we know that there is no outstanding 538 * bitmap, because we know that there is no outstanding
435 * activity on the buffer any more and so it is safe to 539 * activity on the buffer any more and so it is safe to
436 * reallocate it. 540 * reallocate it.
437 */ 541 */
438 BUFFER_TRACE(bitmap_bh, "set in b_committed_data"); 542 BUFFER_TRACE(bitmap_bh, "set in b_committed_data");
439 J_ASSERT_BH(bitmap_bh, 543 J_ASSERT_BH(bitmap_bh,
@@ -490,7 +594,13 @@ error_return:
490 return; 594 return;
491} 595}
492 596
493/* Free given blocks, update quota and i_blocks field */ 597/**
598 * ext3_free_blocks() -- Free given blocks and update quota
599 * @handle: handle for this transaction
600 * @inode: inode
601 * @block: start physical block to free
602 * @count: number of blocks to count
603 */
494void ext3_free_blocks(handle_t *handle, struct inode *inode, 604void ext3_free_blocks(handle_t *handle, struct inode *inode,
495 ext3_fsblk_t block, unsigned long count) 605 ext3_fsblk_t block, unsigned long count)
496{ 606{
@@ -508,7 +618,11 @@ void ext3_free_blocks(handle_t *handle, struct inode *inode,
508 return; 618 return;
509} 619}
510 620
511/* 621/**
622 * ext3_test_allocatable()
623 * @nr: given allocation block group
624 * @bh: bufferhead contains the bitmap of the given block group
625 *
512 * For ext3 allocations, we must not reuse any blocks which are 626 * For ext3 allocations, we must not reuse any blocks which are
513 * allocated in the bitmap buffer's "last committed data" copy. This 627 * allocated in the bitmap buffer's "last committed data" copy. This
514 * prevents deletes from freeing up the page for reuse until we have 628 * prevents deletes from freeing up the page for reuse until we have
@@ -518,7 +632,7 @@ void ext3_free_blocks(handle_t *handle, struct inode *inode,
518 * data would allow the old block to be overwritten before the 632 * data would allow the old block to be overwritten before the
519 * transaction committed (because we force data to disk before commit). 633 * transaction committed (because we force data to disk before commit).
520 * This would lead to corruption if we crashed between overwriting the 634 * This would lead to corruption if we crashed between overwriting the
521 * data and committing the delete. 635 * data and committing the delete.
522 * 636 *
523 * @@@ We may want to make this allocation behaviour conditional on 637 * @@@ We may want to make this allocation behaviour conditional on
524 * data-writes at some point, and disable it for metadata allocations or 638 * data-writes at some point, and disable it for metadata allocations or
@@ -541,6 +655,16 @@ static int ext3_test_allocatable(ext3_grpblk_t nr, struct buffer_head *bh)
541 return ret; 655 return ret;
542} 656}
543 657
658/**
659 * bitmap_search_next_usable_block()
660 * @start: the starting block (group relative) of the search
661 * @bh: bufferhead contains the block group bitmap
662 * @maxblocks: the ending block (group relative) of the reservation
663 *
664 * The bitmap search --- search forward alternately through the actual
665 * bitmap on disk and the last-committed copy in journal, until we find a
666 * bit free in both bitmaps.
667 */
544static ext3_grpblk_t 668static ext3_grpblk_t
545bitmap_search_next_usable_block(ext3_grpblk_t start, struct buffer_head *bh, 669bitmap_search_next_usable_block(ext3_grpblk_t start, struct buffer_head *bh,
546 ext3_grpblk_t maxblocks) 670 ext3_grpblk_t maxblocks)
@@ -548,11 +672,6 @@ bitmap_search_next_usable_block(ext3_grpblk_t start, struct buffer_head *bh,
548 ext3_grpblk_t next; 672 ext3_grpblk_t next;
549 struct journal_head *jh = bh2jh(bh); 673 struct journal_head *jh = bh2jh(bh);
550 674
551 /*
552 * The bitmap search --- search forward alternately through the actual
553 * bitmap and the last-committed copy until we find a bit free in
554 * both
555 */
556 while (start < maxblocks) { 675 while (start < maxblocks) {
557 next = ext3_find_next_zero_bit(bh->b_data, maxblocks, start); 676 next = ext3_find_next_zero_bit(bh->b_data, maxblocks, start);
558 if (next >= maxblocks) 677 if (next >= maxblocks)
@@ -562,14 +681,20 @@ bitmap_search_next_usable_block(ext3_grpblk_t start, struct buffer_head *bh,
562 jbd_lock_bh_state(bh); 681 jbd_lock_bh_state(bh);
563 if (jh->b_committed_data) 682 if (jh->b_committed_data)
564 start = ext3_find_next_zero_bit(jh->b_committed_data, 683 start = ext3_find_next_zero_bit(jh->b_committed_data,
565 maxblocks, next); 684 maxblocks, next);
566 jbd_unlock_bh_state(bh); 685 jbd_unlock_bh_state(bh);
567 } 686 }
568 return -1; 687 return -1;
569} 688}
570 689
571/* 690/**
572 * Find an allocatable block in a bitmap. We honour both the bitmap and 691 * find_next_usable_block()
692 * @start: the starting block (group relative) to find next
693 * allocatable block in bitmap.
694 * @bh: bufferhead contains the block group bitmap
695 * @maxblocks: the ending block (group relative) for the search
696 *
697 * Find an allocatable block in a bitmap. We honor both the bitmap and
573 * its last-committed copy (if that exists), and perform the "most 698 * its last-committed copy (if that exists), and perform the "most
574 * appropriate allocation" algorithm of looking for a free block near 699 * appropriate allocation" algorithm of looking for a free block near
575 * the initial goal; then for a free byte somewhere in the bitmap; then 700 * the initial goal; then for a free byte somewhere in the bitmap; then
@@ -584,7 +709,7 @@ find_next_usable_block(ext3_grpblk_t start, struct buffer_head *bh,
584 709
585 if (start > 0) { 710 if (start > 0) {
586 /* 711 /*
587 * The goal was occupied; search forward for a free 712 * The goal was occupied; search forward for a free
588 * block within the next XX blocks. 713 * block within the next XX blocks.
589 * 714 *
590 * end_goal is more or less random, but it has to be 715 * end_goal is more or less random, but it has to be
@@ -620,7 +745,11 @@ find_next_usable_block(ext3_grpblk_t start, struct buffer_head *bh,
620 return here; 745 return here;
621} 746}
622 747
623/* 748/**
749 * claim_block()
750 * @block: the free block (group relative) to allocate
751 * @bh: the bufferhead containts the block group bitmap
752 *
624 * We think we can allocate this block in this bitmap. Try to set the bit. 753 * We think we can allocate this block in this bitmap. Try to set the bit.
625 * If that succeeds then check that nobody has allocated and then freed the 754 * If that succeeds then check that nobody has allocated and then freed the
626 * block since we saw that is was not marked in b_committed_data. If it _was_ 755 * block since we saw that is was not marked in b_committed_data. If it _was_
@@ -646,7 +775,26 @@ claim_block(spinlock_t *lock, ext3_grpblk_t block, struct buffer_head *bh)
646 return ret; 775 return ret;
647} 776}
648 777
649/* 778/**
779 * ext3_try_to_allocate()
780 * @sb: superblock
781 * @handle: handle to this transaction
782 * @group: given allocation block group
783 * @bitmap_bh: bufferhead holds the block bitmap
784 * @grp_goal: given target block within the group
785 * @count: target number of blocks to allocate
786 * @my_rsv: reservation window
787 *
788 * Attempt to allocate blocks within a give range. Set the range of allocation
789 * first, then find the first free bit(s) from the bitmap (within the range),
790 * and at last, allocate the blocks by claiming the found free bit as allocated.
791 *
792 * To set the range of this allocation:
793 * if there is a reservation window, only try to allocate block(s) from the
794 * file's own reservation window;
795 * Otherwise, the allocation range starts from the give goal block, ends at
796 * the block group's last block.
797 *
650 * If we failed to allocate the desired block then we may end up crossing to a 798 * If we failed to allocate the desired block then we may end up crossing to a
651 * new bitmap. In that case we must release write access to the old one via 799 * new bitmap. In that case we must release write access to the old one via
652 * ext3_journal_release_buffer(), else we'll run out of credits. 800 * ext3_journal_release_buffer(), else we'll run out of credits.
@@ -703,7 +851,8 @@ repeat:
703 } 851 }
704 start = grp_goal; 852 start = grp_goal;
705 853
706 if (!claim_block(sb_bgl_lock(EXT3_SB(sb), group), grp_goal, bitmap_bh)) { 854 if (!claim_block(sb_bgl_lock(EXT3_SB(sb), group),
855 grp_goal, bitmap_bh)) {
707 /* 856 /*
708 * The block was allocated by another thread, or it was 857 * The block was allocated by another thread, or it was
709 * allocated and then freed by another thread 858 * allocated and then freed by another thread
@@ -718,7 +867,8 @@ repeat:
718 grp_goal++; 867 grp_goal++;
719 while (num < *count && grp_goal < end 868 while (num < *count && grp_goal < end
720 && ext3_test_allocatable(grp_goal, bitmap_bh) 869 && ext3_test_allocatable(grp_goal, bitmap_bh)
721 && claim_block(sb_bgl_lock(EXT3_SB(sb), group), grp_goal, bitmap_bh)) { 870 && claim_block(sb_bgl_lock(EXT3_SB(sb), group),
871 grp_goal, bitmap_bh)) {
722 num++; 872 num++;
723 grp_goal++; 873 grp_goal++;
724 } 874 }
@@ -730,12 +880,12 @@ fail_access:
730} 880}
731 881
732/** 882/**
733 * find_next_reservable_window(): 883 * find_next_reservable_window():
734 * find a reservable space within the given range. 884 * find a reservable space within the given range.
735 * It does not allocate the reservation window for now: 885 * It does not allocate the reservation window for now:
736 * alloc_new_reservation() will do the work later. 886 * alloc_new_reservation() will do the work later.
737 * 887 *
738 * @search_head: the head of the searching list; 888 * @search_head: the head of the searching list;
739 * This is not necessarily the list head of the whole filesystem 889 * This is not necessarily the list head of the whole filesystem
740 * 890 *
741 * We have both head and start_block to assist the search 891 * We have both head and start_block to assist the search
@@ -743,12 +893,12 @@ fail_access:
743 * but we will shift to the place where start_block is, 893 * but we will shift to the place where start_block is,
744 * then start from there, when looking for a reservable space. 894 * then start from there, when looking for a reservable space.
745 * 895 *
746 * @size: the target new reservation window size 896 * @size: the target new reservation window size
747 * 897 *
748 * @group_first_block: the first block we consider to start 898 * @group_first_block: the first block we consider to start
749 * the real search from 899 * the real search from
750 * 900 *
751 * @last_block: 901 * @last_block:
752 * the maximum block number that our goal reservable space 902 * the maximum block number that our goal reservable space
753 * could start from. This is normally the last block in this 903 * could start from. This is normally the last block in this
754 * group. The search will end when we found the start of next 904 * group. The search will end when we found the start of next
@@ -756,10 +906,10 @@ fail_access:
756 * This could handle the cross boundary reservation window 906 * This could handle the cross boundary reservation window
757 * request. 907 * request.
758 * 908 *
759 * basically we search from the given range, rather than the whole 909 * basically we search from the given range, rather than the whole
760 * reservation double linked list, (start_block, last_block) 910 * reservation double linked list, (start_block, last_block)
761 * to find a free region that is of my size and has not 911 * to find a free region that is of my size and has not
762 * been reserved. 912 * been reserved.
763 * 913 *
764 */ 914 */
765static int find_next_reservable_window( 915static int find_next_reservable_window(
@@ -812,7 +962,7 @@ static int find_next_reservable_window(
812 /* 962 /*
813 * Found a reserveable space big enough. We could 963 * Found a reserveable space big enough. We could
814 * have a reservation across the group boundary here 964 * have a reservation across the group boundary here
815 */ 965 */
816 break; 966 break;
817 } 967 }
818 } 968 }
@@ -848,7 +998,7 @@ static int find_next_reservable_window(
848} 998}
849 999
850/** 1000/**
851 * alloc_new_reservation()--allocate a new reservation window 1001 * alloc_new_reservation()--allocate a new reservation window
852 * 1002 *
853 * To make a new reservation, we search part of the filesystem 1003 * To make a new reservation, we search part of the filesystem
854 * reservation list (the list that inside the group). We try to 1004 * reservation list (the list that inside the group). We try to
@@ -897,7 +1047,7 @@ static int alloc_new_reservation(struct ext3_reserve_window_node *my_rsv,
897 spinlock_t *rsv_lock = &EXT3_SB(sb)->s_rsv_window_lock; 1047 spinlock_t *rsv_lock = &EXT3_SB(sb)->s_rsv_window_lock;
898 1048
899 group_first_block = ext3_group_first_block_no(sb, group); 1049 group_first_block = ext3_group_first_block_no(sb, group);
900 group_end_block = group_first_block + EXT3_BLOCKS_PER_GROUP(sb) - 1; 1050 group_end_block = group_first_block + (EXT3_BLOCKS_PER_GROUP(sb) - 1);
901 1051
902 if (grp_goal < 0) 1052 if (grp_goal < 0)
903 start_block = group_first_block; 1053 start_block = group_first_block;
@@ -929,9 +1079,10 @@ static int alloc_new_reservation(struct ext3_reserve_window_node *my_rsv,
929 if ((my_rsv->rsv_alloc_hit > 1079 if ((my_rsv->rsv_alloc_hit >
930 (my_rsv->rsv_end - my_rsv->rsv_start + 1) / 2)) { 1080 (my_rsv->rsv_end - my_rsv->rsv_start + 1) / 2)) {
931 /* 1081 /*
932 * if we previously allocation hit ration is greater than half 1082 * if the previously allocation hit ratio is
933 * we double the size of reservation window next time 1083 * greater than 1/2, then we double the size of
934 * otherwise keep the same 1084 * the reservation window the next time,
1085 * otherwise we keep the same size window
935 */ 1086 */
936 size = size * 2; 1087 size = size * 2;
937 if (size > EXT3_MAX_RESERVE_BLOCKS) 1088 if (size > EXT3_MAX_RESERVE_BLOCKS)
@@ -1010,6 +1161,23 @@ retry:
1010 goto retry; 1161 goto retry;
1011} 1162}
1012 1163
1164/**
1165 * try_to_extend_reservation()
1166 * @my_rsv: given reservation window
1167 * @sb: super block
1168 * @size: the delta to extend
1169 *
1170 * Attempt to expand the reservation window large enough to have
1171 * required number of free blocks
1172 *
1173 * Since ext3_try_to_allocate() will always allocate blocks within
1174 * the reservation window range, if the window size is too small,
1175 * multiple blocks allocation has to stop at the end of the reservation
1176 * window. To make this more efficient, given the total number of
1177 * blocks needed and the current size of the window, we try to
1178 * expand the reservation window size if necessary on a best-effort
1179 * basis before ext3_new_blocks() tries to allocate blocks,
1180 */
1013static void try_to_extend_reservation(struct ext3_reserve_window_node *my_rsv, 1181static void try_to_extend_reservation(struct ext3_reserve_window_node *my_rsv,
1014 struct super_block *sb, int size) 1182 struct super_block *sb, int size)
1015{ 1183{
@@ -1035,7 +1203,17 @@ static void try_to_extend_reservation(struct ext3_reserve_window_node *my_rsv,
1035 spin_unlock(rsv_lock); 1203 spin_unlock(rsv_lock);
1036} 1204}
1037 1205
1038/* 1206/**
1207 * ext3_try_to_allocate_with_rsv()
1208 * @sb: superblock
1209 * @handle: handle to this transaction
1210 * @group: given allocation block group
1211 * @bitmap_bh: bufferhead holds the block bitmap
1212 * @grp_goal: given target block within the group
1213 * @count: target number of blocks to allocate
1214 * @my_rsv: reservation window
1215 * @errp: pointer to store the error code
1216 *
1039 * This is the main function used to allocate a new block and its reservation 1217 * This is the main function used to allocate a new block and its reservation
1040 * window. 1218 * window.
1041 * 1219 *
@@ -1051,9 +1229,7 @@ static void try_to_extend_reservation(struct ext3_reserve_window_node *my_rsv,
1051 * reservation), and there are lots of free blocks, but they are all 1229 * reservation), and there are lots of free blocks, but they are all
1052 * being reserved. 1230 * being reserved.
1053 * 1231 *
1054 * We use a sorted double linked list for the per-filesystem reservation list. 1232 * We use a red-black tree for the per-filesystem reservation list.
1055 * The insert, remove and find a free space(non-reserved) operations for the
1056 * sorted double linked list should be fast.
1057 * 1233 *
1058 */ 1234 */
1059static ext3_grpblk_t 1235static ext3_grpblk_t
@@ -1063,7 +1239,7 @@ ext3_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle,
1063 struct ext3_reserve_window_node * my_rsv, 1239 struct ext3_reserve_window_node * my_rsv,
1064 unsigned long *count, int *errp) 1240 unsigned long *count, int *errp)
1065{ 1241{
1066 ext3_fsblk_t group_first_block; 1242 ext3_fsblk_t group_first_block, group_last_block;
1067 ext3_grpblk_t ret = 0; 1243 ext3_grpblk_t ret = 0;
1068 int fatal; 1244 int fatal;
1069 unsigned long num = *count; 1245 unsigned long num = *count;
@@ -1100,6 +1276,7 @@ ext3_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle,
1100 * first block is the block number of the first block in this group 1276 * first block is the block number of the first block in this group
1101 */ 1277 */
1102 group_first_block = ext3_group_first_block_no(sb, group); 1278 group_first_block = ext3_group_first_block_no(sb, group);
1279 group_last_block = group_first_block + (EXT3_BLOCKS_PER_GROUP(sb) - 1);
1103 1280
1104 /* 1281 /*
1105 * Basically we will allocate a new block from inode's reservation 1282 * Basically we will allocate a new block from inode's reservation
@@ -1118,7 +1295,8 @@ ext3_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle,
1118 */ 1295 */
1119 while (1) { 1296 while (1) {
1120 if (rsv_is_empty(&my_rsv->rsv_window) || (ret < 0) || 1297 if (rsv_is_empty(&my_rsv->rsv_window) || (ret < 0) ||
1121 !goal_in_my_reservation(&my_rsv->rsv_window, grp_goal, group, sb)) { 1298 !goal_in_my_reservation(&my_rsv->rsv_window,
1299 grp_goal, group, sb)) {
1122 if (my_rsv->rsv_goal_size < *count) 1300 if (my_rsv->rsv_goal_size < *count)
1123 my_rsv->rsv_goal_size = *count; 1301 my_rsv->rsv_goal_size = *count;
1124 ret = alloc_new_reservation(my_rsv, grp_goal, sb, 1302 ret = alloc_new_reservation(my_rsv, grp_goal, sb,
@@ -1126,17 +1304,21 @@ ext3_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle,
1126 if (ret < 0) 1304 if (ret < 0)
1127 break; /* failed */ 1305 break; /* failed */
1128 1306
1129 if (!goal_in_my_reservation(&my_rsv->rsv_window, grp_goal, group, sb)) 1307 if (!goal_in_my_reservation(&my_rsv->rsv_window,
1308 grp_goal, group, sb))
1130 grp_goal = -1; 1309 grp_goal = -1;
1131 } else if (grp_goal > 0 && (my_rsv->rsv_end-grp_goal+1) < *count) 1310 } else if (grp_goal > 0 &&
1311 (my_rsv->rsv_end-grp_goal+1) < *count)
1132 try_to_extend_reservation(my_rsv, sb, 1312 try_to_extend_reservation(my_rsv, sb,
1133 *count-my_rsv->rsv_end + grp_goal - 1); 1313 *count-my_rsv->rsv_end + grp_goal - 1);
1134 1314
1135 if ((my_rsv->rsv_start >= group_first_block + EXT3_BLOCKS_PER_GROUP(sb)) 1315 if ((my_rsv->rsv_start > group_last_block) ||
1136 || (my_rsv->rsv_end < group_first_block)) 1316 (my_rsv->rsv_end < group_first_block)) {
1317 rsv_window_dump(&EXT3_SB(sb)->s_rsv_window_root, 1);
1137 BUG(); 1318 BUG();
1138 ret = ext3_try_to_allocate(sb, handle, group, bitmap_bh, grp_goal, 1319 }
1139 &num, &my_rsv->rsv_window); 1320 ret = ext3_try_to_allocate(sb, handle, group, bitmap_bh,
1321 grp_goal, &num, &my_rsv->rsv_window);
1140 if (ret >= 0) { 1322 if (ret >= 0) {
1141 my_rsv->rsv_alloc_hit += num; 1323 my_rsv->rsv_alloc_hit += num;
1142 *count = num; 1324 *count = num;
@@ -1161,6 +1343,12 @@ out:
1161 return ret; 1343 return ret;
1162} 1344}
1163 1345
1346/**
1347 * ext3_has_free_blocks()
1348 * @sbi: in-core super block structure.
1349 *
1350 * Check if filesystem has at least 1 free block available for allocation.
1351 */
1164static int ext3_has_free_blocks(struct ext3_sb_info *sbi) 1352static int ext3_has_free_blocks(struct ext3_sb_info *sbi)
1165{ 1353{
1166 ext3_fsblk_t free_blocks, root_blocks; 1354 ext3_fsblk_t free_blocks, root_blocks;
@@ -1175,11 +1363,17 @@ static int ext3_has_free_blocks(struct ext3_sb_info *sbi)
1175 return 1; 1363 return 1;
1176} 1364}
1177 1365
1178/* 1366/**
1367 * ext3_should_retry_alloc()
1368 * @sb: super block
1369 * @retries number of attemps has been made
1370 *
1179 * ext3_should_retry_alloc() is called when ENOSPC is returned, and if 1371 * ext3_should_retry_alloc() is called when ENOSPC is returned, and if
1180 * it is profitable to retry the operation, this function will wait 1372 * it is profitable to retry the operation, this function will wait
1181 * for the current or commiting transaction to complete, and then 1373 * for the current or commiting transaction to complete, and then
1182 * return TRUE. 1374 * return TRUE.
1375 *
1376 * if the total number of retries exceed three times, return FALSE.
1183 */ 1377 */
1184int ext3_should_retry_alloc(struct super_block *sb, int *retries) 1378int ext3_should_retry_alloc(struct super_block *sb, int *retries)
1185{ 1379{
@@ -1191,13 +1385,19 @@ int ext3_should_retry_alloc(struct super_block *sb, int *retries)
1191 return journal_force_commit_nested(EXT3_SB(sb)->s_journal); 1385 return journal_force_commit_nested(EXT3_SB(sb)->s_journal);
1192} 1386}
1193 1387
1194/* 1388/**
1195 * ext3_new_block uses a goal block to assist allocation. If the goal is 1389 * ext3_new_blocks() -- core block(s) allocation function
1196 * free, or there is a free block within 32 blocks of the goal, that block 1390 * @handle: handle to this transaction
1197 * is allocated. Otherwise a forward search is made for a free block; within 1391 * @inode: file inode
1198 * each block group the search first looks for an entire free byte in the block 1392 * @goal: given target block(filesystem wide)
1199 * bitmap, and then for any free bit if that fails. 1393 * @count: target number of blocks to allocate
1200 * This function also updates quota and i_blocks field. 1394 * @errp: error code
1395 *
1396 * ext3_new_blocks uses a goal block to assist allocation. It tries to
1397 * allocate block(s) from the block group contains the goal block first. If that
1398 * fails, it will try to allocate block(s) from other block groups without
1399 * any specific goal block.
1400 *
1201 */ 1401 */
1202ext3_fsblk_t ext3_new_blocks(handle_t *handle, struct inode *inode, 1402ext3_fsblk_t ext3_new_blocks(handle_t *handle, struct inode *inode,
1203 ext3_fsblk_t goal, unsigned long *count, int *errp) 1403 ext3_fsblk_t goal, unsigned long *count, int *errp)
@@ -1303,7 +1503,7 @@ retry_alloc:
1303 smp_rmb(); 1503 smp_rmb();
1304 1504
1305 /* 1505 /*
1306 * Now search the rest of the groups. We assume that 1506 * Now search the rest of the groups. We assume that
1307 * i and gdp correctly point to the last group visited. 1507 * i and gdp correctly point to the last group visited.
1308 */ 1508 */
1309 for (bgi = 0; bgi < ngroups; bgi++) { 1509 for (bgi = 0; bgi < ngroups; bgi++) {
@@ -1428,7 +1628,7 @@ allocated:
1428 1628
1429 spin_lock(sb_bgl_lock(sbi, group_no)); 1629 spin_lock(sb_bgl_lock(sbi, group_no));
1430 gdp->bg_free_blocks_count = 1630 gdp->bg_free_blocks_count =
1431 cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) - num); 1631 cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count)-num);
1432 spin_unlock(sb_bgl_lock(sbi, group_no)); 1632 spin_unlock(sb_bgl_lock(sbi, group_no));
1433 percpu_counter_mod(&sbi->s_freeblocks_counter, -num); 1633 percpu_counter_mod(&sbi->s_freeblocks_counter, -num);
1434 1634
@@ -1471,6 +1671,12 @@ ext3_fsblk_t ext3_new_block(handle_t *handle, struct inode *inode,
1471 return ext3_new_blocks(handle, inode, goal, &count, errp); 1671 return ext3_new_blocks(handle, inode, goal, &count, errp);
1472} 1672}
1473 1673
1674/**
1675 * ext3_count_free_blocks() -- count filesystem free blocks
1676 * @sb: superblock
1677 *
1678 * Adds up the number of free blocks from each block group.
1679 */
1474ext3_fsblk_t ext3_count_free_blocks(struct super_block *sb) 1680ext3_fsblk_t ext3_count_free_blocks(struct super_block *sb)
1475{ 1681{
1476 ext3_fsblk_t desc_count; 1682 ext3_fsblk_t desc_count;