aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorYan, Zheng <zheng.yan@oracle.com>2010-05-16 10:49:59 -0400
committerChris Mason <chris.mason@oracle.com>2010-05-25 10:34:53 -0400
commitefa56464562991b8c24f965199888806bd8c4b38 (patch)
treee7c7e69e2931674ddf4f14ac08dfdf43b45de0f4 /fs
parent4a500fd178c89b96fa166a2d9e7855df33429841 (diff)
Btrfs: Pre-allocate space for data relocation
Pre-allocate space for data relocation. This can detect ENOPSC condition caused by fragmentation of free space. Signed-off-by: Yan Zheng <zheng.yan@oracle.com> Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/ctree.h3
-rw-r--r--fs/btrfs/inode.c44
-rw-r--r--fs/btrfs/relocation.c90
3 files changed, 92 insertions, 45 deletions
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index fc324f9fcb42..65530837d04b 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2420,6 +2420,9 @@ int btrfs_cont_expand(struct inode *inode, loff_t size);
2420int btrfs_invalidate_inodes(struct btrfs_root *root); 2420int btrfs_invalidate_inodes(struct btrfs_root *root);
2421void btrfs_add_delayed_iput(struct inode *inode); 2421void btrfs_add_delayed_iput(struct inode *inode);
2422void btrfs_run_delayed_iputs(struct btrfs_root *root); 2422void btrfs_run_delayed_iputs(struct btrfs_root *root);
2423int btrfs_prealloc_file_range(struct inode *inode, int mode,
2424 u64 start, u64 num_bytes, u64 min_size,
2425 loff_t actual_len, u64 *alloc_hint);
2423extern const struct dentry_operations btrfs_dentry_operations; 2426extern const struct dentry_operations btrfs_dentry_operations;
2424 2427
2425/* ioctl.c */ 2428/* ioctl.c */
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index bef69bedf3cf..460dd512eebd 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1175,6 +1175,13 @@ out_check:
1175 num_bytes, num_bytes, type); 1175 num_bytes, num_bytes, type);
1176 BUG_ON(ret); 1176 BUG_ON(ret);
1177 1177
1178 if (root->root_key.objectid ==
1179 BTRFS_DATA_RELOC_TREE_OBJECTID) {
1180 ret = btrfs_reloc_clone_csums(inode, cur_offset,
1181 num_bytes);
1182 BUG_ON(ret);
1183 }
1184
1178 extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, 1185 extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree,
1179 cur_offset, cur_offset + num_bytes - 1, 1186 cur_offset, cur_offset + num_bytes - 1,
1180 locked_page, EXTENT_CLEAR_UNLOCK_PAGE | 1187 locked_page, EXTENT_CLEAR_UNLOCK_PAGE |
@@ -6080,16 +6087,15 @@ out_unlock:
6080 return err; 6087 return err;
6081} 6088}
6082 6089
6083static int prealloc_file_range(struct inode *inode, u64 start, u64 end, 6090int btrfs_prealloc_file_range(struct inode *inode, int mode,
6084 u64 alloc_hint, int mode, loff_t actual_len) 6091 u64 start, u64 num_bytes, u64 min_size,
6092 loff_t actual_len, u64 *alloc_hint)
6085{ 6093{
6086 struct btrfs_trans_handle *trans; 6094 struct btrfs_trans_handle *trans;
6087 struct btrfs_root *root = BTRFS_I(inode)->root; 6095 struct btrfs_root *root = BTRFS_I(inode)->root;
6088 struct btrfs_key ins; 6096 struct btrfs_key ins;
6089 u64 cur_offset = start; 6097 u64 cur_offset = start;
6090 u64 num_bytes = end - start;
6091 int ret = 0; 6098 int ret = 0;
6092 u64 i_size;
6093 6099
6094 while (num_bytes > 0) { 6100 while (num_bytes > 0) {
6095 trans = btrfs_start_transaction(root, 3); 6101 trans = btrfs_start_transaction(root, 3);
@@ -6098,9 +6104,8 @@ static int prealloc_file_range(struct inode *inode, u64 start, u64 end,
6098 break; 6104 break;
6099 } 6105 }
6100 6106
6101 ret = btrfs_reserve_extent(trans, root, num_bytes, 6107 ret = btrfs_reserve_extent(trans, root, num_bytes, min_size,
6102 root->sectorsize, 0, alloc_hint, 6108 0, *alloc_hint, (u64)-1, &ins, 1);
6103 (u64)-1, &ins, 1);
6104 if (ret) { 6109 if (ret) {
6105 btrfs_end_transaction(trans, root); 6110 btrfs_end_transaction(trans, root);
6106 break; 6111 break;
@@ -6117,20 +6122,19 @@ static int prealloc_file_range(struct inode *inode, u64 start, u64 end,
6117 6122
6118 num_bytes -= ins.offset; 6123 num_bytes -= ins.offset;
6119 cur_offset += ins.offset; 6124 cur_offset += ins.offset;
6120 alloc_hint = ins.objectid + ins.offset; 6125 *alloc_hint = ins.objectid + ins.offset;
6121 6126
6122 inode->i_ctime = CURRENT_TIME; 6127 inode->i_ctime = CURRENT_TIME;
6123 BTRFS_I(inode)->flags |= BTRFS_INODE_PREALLOC; 6128 BTRFS_I(inode)->flags |= BTRFS_INODE_PREALLOC;
6124 if (!(mode & FALLOC_FL_KEEP_SIZE) && 6129 if (!(mode & FALLOC_FL_KEEP_SIZE) &&
6125 (actual_len > inode->i_size) && 6130 (actual_len > inode->i_size) &&
6126 (cur_offset > inode->i_size)) { 6131 (cur_offset > inode->i_size)) {
6127
6128 if (cur_offset > actual_len) 6132 if (cur_offset > actual_len)
6129 i_size = actual_len; 6133 i_size_write(inode, actual_len);
6130 else 6134 else
6131 i_size = cur_offset; 6135 i_size_write(inode, cur_offset);
6132 i_size_write(inode, i_size); 6136 i_size_write(inode, cur_offset);
6133 btrfs_ordered_update_i_size(inode, i_size, NULL); 6137 btrfs_ordered_update_i_size(inode, cur_offset, NULL);
6134 } 6138 }
6135 6139
6136 ret = btrfs_update_inode(trans, root, inode); 6140 ret = btrfs_update_inode(trans, root, inode);
@@ -6216,16 +6220,16 @@ static long btrfs_fallocate(struct inode *inode, int mode,
6216 if (em->block_start == EXTENT_MAP_HOLE || 6220 if (em->block_start == EXTENT_MAP_HOLE ||
6217 (cur_offset >= inode->i_size && 6221 (cur_offset >= inode->i_size &&
6218 !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) { 6222 !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) {
6219 ret = prealloc_file_range(inode, 6223 ret = btrfs_prealloc_file_range(inode, 0, cur_offset,
6220 cur_offset, last_byte, 6224 last_byte - cur_offset,
6221 alloc_hint, mode, offset+len); 6225 1 << inode->i_blkbits,
6226 offset + len,
6227 &alloc_hint);
6222 if (ret < 0) { 6228 if (ret < 0) {
6223 free_extent_map(em); 6229 free_extent_map(em);
6224 break; 6230 break;
6225 } 6231 }
6226 } 6232 }
6227 if (em->block_start <= EXTENT_MAP_LAST_BYTE)
6228 alloc_hint = em->block_start;
6229 free_extent_map(em); 6233 free_extent_map(em);
6230 6234
6231 cur_offset = last_byte; 6235 cur_offset = last_byte;
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 145a468c300d..3943526b7348 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -2546,6 +2546,50 @@ out:
2546} 2546}
2547 2547
2548static noinline_for_stack 2548static noinline_for_stack
2549int prealloc_file_extent_cluster(struct inode *inode,
2550 struct file_extent_cluster *cluster)
2551{
2552 u64 alloc_hint = 0;
2553 u64 start;
2554 u64 end;
2555 u64 offset = BTRFS_I(inode)->index_cnt;
2556 u64 num_bytes;
2557 int nr = 0;
2558 int ret = 0;
2559
2560 BUG_ON(cluster->start != cluster->boundary[0]);
2561 mutex_lock(&inode->i_mutex);
2562
2563 ret = btrfs_check_data_free_space(inode, cluster->end +
2564 1 - cluster->start);
2565 if (ret)
2566 goto out;
2567
2568 while (nr < cluster->nr) {
2569 start = cluster->boundary[nr] - offset;
2570 if (nr + 1 < cluster->nr)
2571 end = cluster->boundary[nr + 1] - 1 - offset;
2572 else
2573 end = cluster->end - offset;
2574
2575 lock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS);
2576 num_bytes = end + 1 - start;
2577 ret = btrfs_prealloc_file_range(inode, 0, start,
2578 num_bytes, num_bytes,
2579 end + 1, &alloc_hint);
2580 unlock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS);
2581 if (ret)
2582 break;
2583 nr++;
2584 }
2585 btrfs_free_reserved_data_space(inode, cluster->end +
2586 1 - cluster->start);
2587out:
2588 mutex_unlock(&inode->i_mutex);
2589 return ret;
2590}
2591
2592static noinline_for_stack
2549int setup_extent_mapping(struct inode *inode, u64 start, u64 end, 2593int setup_extent_mapping(struct inode *inode, u64 start, u64 end,
2550 u64 block_start) 2594 u64 block_start)
2551{ 2595{
@@ -2588,7 +2632,6 @@ static int relocate_file_extent_cluster(struct inode *inode,
2588 u64 offset = BTRFS_I(inode)->index_cnt; 2632 u64 offset = BTRFS_I(inode)->index_cnt;
2589 unsigned long index; 2633 unsigned long index;
2590 unsigned long last_index; 2634 unsigned long last_index;
2591 unsigned int dirty_page = 0;
2592 struct page *page; 2635 struct page *page;
2593 struct file_ra_state *ra; 2636 struct file_ra_state *ra;
2594 int nr = 0; 2637 int nr = 0;
@@ -2601,21 +2644,24 @@ static int relocate_file_extent_cluster(struct inode *inode,
2601 if (!ra) 2644 if (!ra)
2602 return -ENOMEM; 2645 return -ENOMEM;
2603 2646
2604 index = (cluster->start - offset) >> PAGE_CACHE_SHIFT; 2647 ret = prealloc_file_extent_cluster(inode, cluster);
2605 last_index = (cluster->end - offset) >> PAGE_CACHE_SHIFT; 2648 if (ret)
2649 goto out;
2606 2650
2607 mutex_lock(&inode->i_mutex); 2651 file_ra_state_init(ra, inode->i_mapping);
2608 2652
2609 i_size_write(inode, cluster->end + 1 - offset);
2610 ret = setup_extent_mapping(inode, cluster->start - offset, 2653 ret = setup_extent_mapping(inode, cluster->start - offset,
2611 cluster->end - offset, cluster->start); 2654 cluster->end - offset, cluster->start);
2612 if (ret) 2655 if (ret)
2613 goto out_unlock; 2656 goto out;
2614
2615 file_ra_state_init(ra, inode->i_mapping);
2616 2657
2617 WARN_ON(cluster->start != cluster->boundary[0]); 2658 index = (cluster->start - offset) >> PAGE_CACHE_SHIFT;
2659 last_index = (cluster->end - offset) >> PAGE_CACHE_SHIFT;
2618 while (index <= last_index) { 2660 while (index <= last_index) {
2661 ret = btrfs_delalloc_reserve_metadata(inode, PAGE_CACHE_SIZE);
2662 if (ret)
2663 goto out;
2664
2619 page = find_lock_page(inode->i_mapping, index); 2665 page = find_lock_page(inode->i_mapping, index);
2620 if (!page) { 2666 if (!page) {
2621 page_cache_sync_readahead(inode->i_mapping, 2667 page_cache_sync_readahead(inode->i_mapping,
@@ -2623,8 +2669,10 @@ static int relocate_file_extent_cluster(struct inode *inode,
2623 last_index + 1 - index); 2669 last_index + 1 - index);
2624 page = grab_cache_page(inode->i_mapping, index); 2670 page = grab_cache_page(inode->i_mapping, index);
2625 if (!page) { 2671 if (!page) {
2672 btrfs_delalloc_release_metadata(inode,
2673 PAGE_CACHE_SIZE);
2626 ret = -ENOMEM; 2674 ret = -ENOMEM;
2627 goto out_unlock; 2675 goto out;
2628 } 2676 }
2629 } 2677 }
2630 2678
@@ -2640,8 +2688,10 @@ static int relocate_file_extent_cluster(struct inode *inode,
2640 if (!PageUptodate(page)) { 2688 if (!PageUptodate(page)) {
2641 unlock_page(page); 2689 unlock_page(page);
2642 page_cache_release(page); 2690 page_cache_release(page);
2691 btrfs_delalloc_release_metadata(inode,
2692 PAGE_CACHE_SIZE);
2643 ret = -EIO; 2693 ret = -EIO;
2644 goto out_unlock; 2694 goto out;
2645 } 2695 }
2646 } 2696 }
2647 2697
@@ -2660,10 +2710,9 @@ static int relocate_file_extent_cluster(struct inode *inode,
2660 EXTENT_BOUNDARY, GFP_NOFS); 2710 EXTENT_BOUNDARY, GFP_NOFS);
2661 nr++; 2711 nr++;
2662 } 2712 }
2663 btrfs_set_extent_delalloc(inode, page_start, page_end, NULL);
2664 2713
2714 btrfs_set_extent_delalloc(inode, page_start, page_end, NULL);
2665 set_page_dirty(page); 2715 set_page_dirty(page);
2666 dirty_page++;
2667 2716
2668 unlock_extent(&BTRFS_I(inode)->io_tree, 2717 unlock_extent(&BTRFS_I(inode)->io_tree,
2669 page_start, page_end, GFP_NOFS); 2718 page_start, page_end, GFP_NOFS);
@@ -2671,20 +2720,11 @@ static int relocate_file_extent_cluster(struct inode *inode,
2671 page_cache_release(page); 2720 page_cache_release(page);
2672 2721
2673 index++; 2722 index++;
2674 if (nr < cluster->nr && 2723 balance_dirty_pages_ratelimited(inode->i_mapping);
2675 page_end + 1 + offset == cluster->boundary[nr]) { 2724 btrfs_throttle(BTRFS_I(inode)->root);
2676 balance_dirty_pages_ratelimited_nr(inode->i_mapping,
2677 dirty_page);
2678 dirty_page = 0;
2679 }
2680 }
2681 if (dirty_page) {
2682 balance_dirty_pages_ratelimited_nr(inode->i_mapping,
2683 dirty_page);
2684 } 2725 }
2685 WARN_ON(nr != cluster->nr); 2726 WARN_ON(nr != cluster->nr);
2686out_unlock: 2727out:
2687 mutex_unlock(&inode->i_mutex);
2688 kfree(ra); 2728 kfree(ra);
2689 return ret; 2729 return ret;
2690} 2730}