aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/btrfs/ctree.h7
-rw-r--r--fs/btrfs/disk-io.c3
-rw-r--r--fs/btrfs/extent-tree.c39
-rw-r--r--fs/btrfs/ioctl.c21
-rw-r--r--fs/btrfs/ordered-data.c56
-rw-r--r--fs/btrfs/ordered-data.h7
6 files changed, 113 insertions, 20 deletions
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 8ecac2e77a43..6675e916ebcd 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -539,6 +539,13 @@ struct btrfs_fs_info {
539 atomic_t nr_async_submits; 539 atomic_t nr_async_submits;
540 540
541 /* 541 /*
542 * this is used by the balancing code to wait for all the pending
543 * ordered extents
544 */
545 spinlock_t ordered_extent_lock;
546 struct list_head ordered_extents;
547
548 /*
542 * there is a pool of worker threads for checksumming during writes 549 * there is a pool of worker threads for checksumming during writes
543 * and a pool for checksumming after reads. This is because readers 550 * and a pool for checksumming after reads. This is because readers
544 * can run with FS locks held, and the writers may be waiting for 551 * can run with FS locks held, and the writers may be waiting for
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 7ce3f83c5dd6..ec01062eb41d 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1252,6 +1252,9 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1252 fs_info->btree_inode->i_nlink = 1; 1252 fs_info->btree_inode->i_nlink = 1;
1253 fs_info->thread_pool_size = min(num_online_cpus() + 2, 8); 1253 fs_info->thread_pool_size = min(num_online_cpus() + 2, 8);
1254 1254
1255 INIT_LIST_HEAD(&fs_info->ordered_extents);
1256 spin_lock_init(&fs_info->ordered_extent_lock);
1257
1255 sb->s_blocksize = 4096; 1258 sb->s_blocksize = 4096;
1256 sb->s_blocksize_bits = blksize_bits(4096); 1259 sb->s_blocksize_bits = blksize_bits(4096);
1257 1260
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index febc6295c7a9..f92b297e7da5 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2640,6 +2640,7 @@ static int noinline relocate_inode_pages(struct inode *inode, u64 start,
2640 struct file_ra_state *ra; 2640 struct file_ra_state *ra;
2641 unsigned long total_read = 0; 2641 unsigned long total_read = 0;
2642 unsigned long ra_pages; 2642 unsigned long ra_pages;
2643 struct btrfs_ordered_extent *ordered;
2643 struct btrfs_trans_handle *trans; 2644 struct btrfs_trans_handle *trans;
2644 2645
2645 ra = kzalloc(sizeof(*ra), GFP_NOFS); 2646 ra = kzalloc(sizeof(*ra), GFP_NOFS);
@@ -2658,9 +2659,9 @@ static int noinline relocate_inode_pages(struct inode *inode, u64 start,
2658 calc_ra(i, last_index, ra_pages)); 2659 calc_ra(i, last_index, ra_pages));
2659 } 2660 }
2660 total_read++; 2661 total_read++;
2661 if (((u64)i << PAGE_CACHE_SHIFT) > inode->i_size) 2662again:
2663 if (((u64)i << PAGE_CACHE_SHIFT) > i_size_read(inode))
2662 goto truncate_racing; 2664 goto truncate_racing;
2663
2664 page = grab_cache_page(inode->i_mapping, i); 2665 page = grab_cache_page(inode->i_mapping, i);
2665 if (!page) { 2666 if (!page) {
2666 goto out_unlock; 2667 goto out_unlock;
@@ -2674,18 +2675,24 @@ static int noinline relocate_inode_pages(struct inode *inode, u64 start,
2674 goto out_unlock; 2675 goto out_unlock;
2675 } 2676 }
2676 } 2677 }
2677#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
2678 ClearPageDirty(page);
2679#else
2680 cancel_dirty_page(page, PAGE_CACHE_SIZE);
2681#endif
2682 wait_on_page_writeback(page); 2678 wait_on_page_writeback(page);
2683 set_page_extent_mapped(page); 2679
2684 page_start = (u64)page->index << PAGE_CACHE_SHIFT; 2680 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
2685 page_end = page_start + PAGE_CACHE_SIZE - 1; 2681 page_end = page_start + PAGE_CACHE_SIZE - 1;
2686
2687 lock_extent(io_tree, page_start, page_end, GFP_NOFS); 2682 lock_extent(io_tree, page_start, page_end, GFP_NOFS);
2688 2683
2684 ordered = btrfs_lookup_ordered_extent(inode, page_start);
2685 if (ordered) {
2686 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
2687 unlock_page(page);
2688 page_cache_release(page);
2689 btrfs_start_ordered_extent(inode, ordered, 1);
2690 btrfs_put_ordered_extent(ordered);
2691 goto again;
2692 }
2693 set_page_extent_mapped(page);
2694
2695
2689 set_extent_delalloc(io_tree, page_start, 2696 set_extent_delalloc(io_tree, page_start,
2690 page_end, GFP_NOFS); 2697 page_end, GFP_NOFS);
2691 set_page_dirty(page); 2698 set_page_dirty(page);
@@ -2694,10 +2701,18 @@ static int noinline relocate_inode_pages(struct inode *inode, u64 start,
2694 unlock_page(page); 2701 unlock_page(page);
2695 page_cache_release(page); 2702 page_cache_release(page);
2696 } 2703 }
2697 balance_dirty_pages_ratelimited_nr(inode->i_mapping,
2698 total_read);
2699 2704
2700out_unlock: 2705out_unlock:
2706 /* we have to start the IO in order to get the ordered extents
2707 * instantiated. This allows the relocation to code to wait
2708 * for all the ordered extents to hit the disk.
2709 *
2710 * Otherwise, it would constantly loop over the same extents
2711 * because the old ones don't get deleted until the IO is
2712 * started
2713 */
2714 btrfs_fdatawrite_range(inode->i_mapping, start, start + len - 1,
2715 WB_SYNC_NONE);
2701 kfree(ra); 2716 kfree(ra);
2702 trans = btrfs_start_transaction(BTRFS_I(inode)->root, 1); 2717 trans = btrfs_start_transaction(BTRFS_I(inode)->root, 1);
2703 if (trans) { 2718 if (trans) {
@@ -3238,6 +3253,8 @@ next:
3238 3253
3239 btrfs_clean_old_snapshots(tree_root); 3254 btrfs_clean_old_snapshots(tree_root);
3240 3255
3256 btrfs_wait_ordered_extents(tree_root);
3257
3241 trans = btrfs_start_transaction(tree_root, 1); 3258 trans = btrfs_start_transaction(tree_root, 1);
3242 btrfs_commit_transaction(trans, tree_root); 3259 btrfs_commit_transaction(trans, tree_root);
3243 mutex_lock(&root->fs_info->alloc_mutex); 3260 mutex_lock(&root->fs_info->alloc_mutex);
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 83f17a5cbd6a..a61f2e7e2db5 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -213,6 +213,7 @@ int btrfs_defrag_file(struct file *file)
213 struct inode *inode = fdentry(file)->d_inode; 213 struct inode *inode = fdentry(file)->d_inode;
214 struct btrfs_root *root = BTRFS_I(inode)->root; 214 struct btrfs_root *root = BTRFS_I(inode)->root;
215 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 215 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
216 struct btrfs_ordered_extent *ordered;
216 struct page *page; 217 struct page *page;
217 unsigned long last_index; 218 unsigned long last_index;
218 unsigned long ra_pages = root->fs_info->bdi.ra_pages; 219 unsigned long ra_pages = root->fs_info->bdi.ra_pages;
@@ -234,6 +235,7 @@ int btrfs_defrag_file(struct file *file)
234 min(last_index, i + ra_pages - 1)); 235 min(last_index, i + ra_pages - 1));
235 } 236 }
236 total_read++; 237 total_read++;
238again:
237 page = grab_cache_page(inode->i_mapping, i); 239 page = grab_cache_page(inode->i_mapping, i);
238 if (!page) 240 if (!page)
239 goto out_unlock; 241 goto out_unlock;
@@ -247,18 +249,23 @@ int btrfs_defrag_file(struct file *file)
247 } 249 }
248 } 250 }
249 251
250#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
251 ClearPageDirty(page);
252#else
253 cancel_dirty_page(page, PAGE_CACHE_SIZE);
254#endif
255 wait_on_page_writeback(page); 252 wait_on_page_writeback(page);
256 set_page_extent_mapped(page);
257 253
258 page_start = (u64)page->index << PAGE_CACHE_SHIFT; 254 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
259 page_end = page_start + PAGE_CACHE_SIZE - 1; 255 page_end = page_start + PAGE_CACHE_SIZE - 1;
260
261 lock_extent(io_tree, page_start, page_end, GFP_NOFS); 256 lock_extent(io_tree, page_start, page_end, GFP_NOFS);
257
258 ordered = btrfs_lookup_ordered_extent(inode, page_start);
259 if (ordered) {
260 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
261 unlock_page(page);
262 page_cache_release(page);
263 btrfs_start_ordered_extent(inode, ordered, 1);
264 btrfs_put_ordered_extent(ordered);
265 goto again;
266 }
267 set_page_extent_mapped(page);
268
262 set_extent_delalloc(io_tree, page_start, 269 set_extent_delalloc(io_tree, page_start,
263 page_end, GFP_NOFS); 270 page_end, GFP_NOFS);
264 271
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index e42fd233e04c..676e4bd65c52 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -167,20 +167,28 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
167 entry->file_offset = file_offset; 167 entry->file_offset = file_offset;
168 entry->start = start; 168 entry->start = start;
169 entry->len = len; 169 entry->len = len;
170 entry->inode = inode;
171
170 /* one ref for the tree */ 172 /* one ref for the tree */
171 atomic_set(&entry->refs, 1); 173 atomic_set(&entry->refs, 1);
172 init_waitqueue_head(&entry->wait); 174 init_waitqueue_head(&entry->wait);
173 INIT_LIST_HEAD(&entry->list); 175 INIT_LIST_HEAD(&entry->list);
176 INIT_LIST_HEAD(&entry->root_extent_list);
174 177
175 node = tree_insert(&tree->tree, file_offset, 178 node = tree_insert(&tree->tree, file_offset,
176 &entry->rb_node); 179 &entry->rb_node);
177 if (node) { 180 if (node) {
178 entry = rb_entry(node, struct btrfs_ordered_extent, rb_node); 181 printk("warning dup entry from add_ordered_extent\n");
179 atomic_inc(&entry->refs); 182 BUG();
180 } 183 }
181 set_extent_ordered(&BTRFS_I(inode)->io_tree, file_offset, 184 set_extent_ordered(&BTRFS_I(inode)->io_tree, file_offset,
182 entry_end(entry) - 1, GFP_NOFS); 185 entry_end(entry) - 1, GFP_NOFS);
183 186
187 spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock);
188 list_add_tail(&entry->root_extent_list,
189 &BTRFS_I(inode)->root->fs_info->ordered_extents);
190 spin_unlock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock);
191
184 mutex_unlock(&tree->mutex); 192 mutex_unlock(&tree->mutex);
185 BUG_ON(node); 193 BUG_ON(node);
186 return 0; 194 return 0;
@@ -285,11 +293,55 @@ int btrfs_remove_ordered_extent(struct inode *inode,
285 rb_erase(node, &tree->tree); 293 rb_erase(node, &tree->tree);
286 tree->last = NULL; 294 tree->last = NULL;
287 set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags); 295 set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags);
296
297 spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock);
298 list_del_init(&entry->root_extent_list);
299 spin_unlock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock);
300
288 mutex_unlock(&tree->mutex); 301 mutex_unlock(&tree->mutex);
289 wake_up(&entry->wait); 302 wake_up(&entry->wait);
290 return 0; 303 return 0;
291} 304}
292 305
306int btrfs_wait_ordered_extents(struct btrfs_root *root)
307{
308 struct list_head splice;
309 struct list_head *cur;
310 struct btrfs_ordered_extent *ordered;
311 struct inode *inode;
312
313 INIT_LIST_HEAD(&splice);
314
315 spin_lock(&root->fs_info->ordered_extent_lock);
316 list_splice_init(&root->fs_info->ordered_extents, &splice);
317 while(!list_empty(&splice)) {
318 cur = splice.next;
319 ordered = list_entry(cur, struct btrfs_ordered_extent,
320 root_extent_list);
321 list_del_init(&ordered->root_extent_list);
322 atomic_inc(&ordered->refs);
323 inode = ordered->inode;
324
325 /*
326 * the inode can't go away until all the pages are gone
327 * and the pages won't go away while there is still
328 * an ordered extent and the ordered extent won't go
329 * away until it is off this list. So, we can safely
330 * increment i_count here and call iput later
331 */
332 atomic_inc(&inode->i_count);
333 spin_unlock(&root->fs_info->ordered_extent_lock);
334
335 btrfs_start_ordered_extent(inode, ordered, 1);
336 btrfs_put_ordered_extent(ordered);
337 iput(inode);
338
339 spin_lock(&root->fs_info->ordered_extent_lock);
340 }
341 spin_unlock(&root->fs_info->ordered_extent_lock);
342 return 0;
343}
344
293/* 345/*
294 * Used to start IO or wait for a given ordered extent to finish. 346 * Used to start IO or wait for a given ordered extent to finish.
295 * 347 *
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index 199cb0b4f1d9..5efe6b63c74c 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -80,6 +80,9 @@ struct btrfs_ordered_extent {
80 /* reference count */ 80 /* reference count */
81 atomic_t refs; 81 atomic_t refs;
82 82
83 /* the inode we belong to */
84 struct inode *inode;
85
83 /* list of checksums for insertion when the extent io is done */ 86 /* list of checksums for insertion when the extent io is done */
84 struct list_head list; 87 struct list_head list;
85 88
@@ -88,6 +91,9 @@ struct btrfs_ordered_extent {
88 91
89 /* our friendly rbtree entry */ 92 /* our friendly rbtree entry */
90 struct rb_node rb_node; 93 struct rb_node rb_node;
94
95 /* a per root list of all the pending ordered extents */
96 struct list_head root_extent_list;
91}; 97};
92 98
93 99
@@ -137,4 +143,5 @@ int btrfs_wait_on_page_writeback_range(struct address_space *mapping,
137 pgoff_t start, pgoff_t end); 143 pgoff_t start, pgoff_t end);
138int btrfs_fdatawrite_range(struct address_space *mapping, loff_t start, 144int btrfs_fdatawrite_range(struct address_space *mapping, loff_t start,
139 loff_t end, int sync_mode); 145 loff_t end, int sync_mode);
146int btrfs_wait_ordered_extents(struct btrfs_root *root);
140#endif 147#endif