diff options
-rw-r--r-- | fs/btrfs/ctree.h | 7 | ||||
-rw-r--r-- | fs/btrfs/disk-io.c | 3 | ||||
-rw-r--r-- | fs/btrfs/extent-tree.c | 39 | ||||
-rw-r--r-- | fs/btrfs/ioctl.c | 21 | ||||
-rw-r--r-- | fs/btrfs/ordered-data.c | 56 | ||||
-rw-r--r-- | fs/btrfs/ordered-data.h | 7 |
6 files changed, 113 insertions, 20 deletions
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 8ecac2e77a43..6675e916ebcd 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
@@ -539,6 +539,13 @@ struct btrfs_fs_info { | |||
539 | atomic_t nr_async_submits; | 539 | atomic_t nr_async_submits; |
540 | 540 | ||
541 | /* | 541 | /* |
542 | * this is used by the balancing code to wait for all the pending | ||
543 | * ordered extents | ||
544 | */ | ||
545 | spinlock_t ordered_extent_lock; | ||
546 | struct list_head ordered_extents; | ||
547 | |||
548 | /* | ||
542 | * there is a pool of worker threads for checksumming during writes | 549 | * there is a pool of worker threads for checksumming during writes |
543 | * and a pool for checksumming after reads. This is because readers | 550 | * and a pool for checksumming after reads. This is because readers |
544 | * can run with FS locks held, and the writers may be waiting for | 551 | * can run with FS locks held, and the writers may be waiting for |
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 7ce3f83c5dd6..ec01062eb41d 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -1252,6 +1252,9 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1252 | fs_info->btree_inode->i_nlink = 1; | 1252 | fs_info->btree_inode->i_nlink = 1; |
1253 | fs_info->thread_pool_size = min(num_online_cpus() + 2, 8); | 1253 | fs_info->thread_pool_size = min(num_online_cpus() + 2, 8); |
1254 | 1254 | ||
1255 | INIT_LIST_HEAD(&fs_info->ordered_extents); | ||
1256 | spin_lock_init(&fs_info->ordered_extent_lock); | ||
1257 | |||
1255 | sb->s_blocksize = 4096; | 1258 | sb->s_blocksize = 4096; |
1256 | sb->s_blocksize_bits = blksize_bits(4096); | 1259 | sb->s_blocksize_bits = blksize_bits(4096); |
1257 | 1260 | ||
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index febc6295c7a9..f92b297e7da5 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -2640,6 +2640,7 @@ static int noinline relocate_inode_pages(struct inode *inode, u64 start, | |||
2640 | struct file_ra_state *ra; | 2640 | struct file_ra_state *ra; |
2641 | unsigned long total_read = 0; | 2641 | unsigned long total_read = 0; |
2642 | unsigned long ra_pages; | 2642 | unsigned long ra_pages; |
2643 | struct btrfs_ordered_extent *ordered; | ||
2643 | struct btrfs_trans_handle *trans; | 2644 | struct btrfs_trans_handle *trans; |
2644 | 2645 | ||
2645 | ra = kzalloc(sizeof(*ra), GFP_NOFS); | 2646 | ra = kzalloc(sizeof(*ra), GFP_NOFS); |
@@ -2658,9 +2659,9 @@ static int noinline relocate_inode_pages(struct inode *inode, u64 start, | |||
2658 | calc_ra(i, last_index, ra_pages)); | 2659 | calc_ra(i, last_index, ra_pages)); |
2659 | } | 2660 | } |
2660 | total_read++; | 2661 | total_read++; |
2661 | if (((u64)i << PAGE_CACHE_SHIFT) > inode->i_size) | 2662 | again: |
2663 | if (((u64)i << PAGE_CACHE_SHIFT) > i_size_read(inode)) | ||
2662 | goto truncate_racing; | 2664 | goto truncate_racing; |
2663 | |||
2664 | page = grab_cache_page(inode->i_mapping, i); | 2665 | page = grab_cache_page(inode->i_mapping, i); |
2665 | if (!page) { | 2666 | if (!page) { |
2666 | goto out_unlock; | 2667 | goto out_unlock; |
@@ -2674,18 +2675,24 @@ static int noinline relocate_inode_pages(struct inode *inode, u64 start, | |||
2674 | goto out_unlock; | 2675 | goto out_unlock; |
2675 | } | 2676 | } |
2676 | } | 2677 | } |
2677 | #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) | ||
2678 | ClearPageDirty(page); | ||
2679 | #else | ||
2680 | cancel_dirty_page(page, PAGE_CACHE_SIZE); | ||
2681 | #endif | ||
2682 | wait_on_page_writeback(page); | 2678 | wait_on_page_writeback(page); |
2683 | set_page_extent_mapped(page); | 2679 | |
2684 | page_start = (u64)page->index << PAGE_CACHE_SHIFT; | 2680 | page_start = (u64)page->index << PAGE_CACHE_SHIFT; |
2685 | page_end = page_start + PAGE_CACHE_SIZE - 1; | 2681 | page_end = page_start + PAGE_CACHE_SIZE - 1; |
2686 | |||
2687 | lock_extent(io_tree, page_start, page_end, GFP_NOFS); | 2682 | lock_extent(io_tree, page_start, page_end, GFP_NOFS); |
2688 | 2683 | ||
2684 | ordered = btrfs_lookup_ordered_extent(inode, page_start); | ||
2685 | if (ordered) { | ||
2686 | unlock_extent(io_tree, page_start, page_end, GFP_NOFS); | ||
2687 | unlock_page(page); | ||
2688 | page_cache_release(page); | ||
2689 | btrfs_start_ordered_extent(inode, ordered, 1); | ||
2690 | btrfs_put_ordered_extent(ordered); | ||
2691 | goto again; | ||
2692 | } | ||
2693 | set_page_extent_mapped(page); | ||
2694 | |||
2695 | |||
2689 | set_extent_delalloc(io_tree, page_start, | 2696 | set_extent_delalloc(io_tree, page_start, |
2690 | page_end, GFP_NOFS); | 2697 | page_end, GFP_NOFS); |
2691 | set_page_dirty(page); | 2698 | set_page_dirty(page); |
@@ -2694,10 +2701,18 @@ static int noinline relocate_inode_pages(struct inode *inode, u64 start, | |||
2694 | unlock_page(page); | 2701 | unlock_page(page); |
2695 | page_cache_release(page); | 2702 | page_cache_release(page); |
2696 | } | 2703 | } |
2697 | balance_dirty_pages_ratelimited_nr(inode->i_mapping, | ||
2698 | total_read); | ||
2699 | 2704 | ||
2700 | out_unlock: | 2705 | out_unlock: |
2706 | /* we have to start the IO in order to get the ordered extents | ||
2707 | * instantiated. This allows the relocation to code to wait | ||
2708 | * for all the ordered extents to hit the disk. | ||
2709 | * | ||
2710 | * Otherwise, it would constantly loop over the same extents | ||
2711 | * because the old ones don't get deleted until the IO is | ||
2712 | * started | ||
2713 | */ | ||
2714 | btrfs_fdatawrite_range(inode->i_mapping, start, start + len - 1, | ||
2715 | WB_SYNC_NONE); | ||
2701 | kfree(ra); | 2716 | kfree(ra); |
2702 | trans = btrfs_start_transaction(BTRFS_I(inode)->root, 1); | 2717 | trans = btrfs_start_transaction(BTRFS_I(inode)->root, 1); |
2703 | if (trans) { | 2718 | if (trans) { |
@@ -3238,6 +3253,8 @@ next: | |||
3238 | 3253 | ||
3239 | btrfs_clean_old_snapshots(tree_root); | 3254 | btrfs_clean_old_snapshots(tree_root); |
3240 | 3255 | ||
3256 | btrfs_wait_ordered_extents(tree_root); | ||
3257 | |||
3241 | trans = btrfs_start_transaction(tree_root, 1); | 3258 | trans = btrfs_start_transaction(tree_root, 1); |
3242 | btrfs_commit_transaction(trans, tree_root); | 3259 | btrfs_commit_transaction(trans, tree_root); |
3243 | mutex_lock(&root->fs_info->alloc_mutex); | 3260 | mutex_lock(&root->fs_info->alloc_mutex); |
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 83f17a5cbd6a..a61f2e7e2db5 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c | |||
@@ -213,6 +213,7 @@ int btrfs_defrag_file(struct file *file) | |||
213 | struct inode *inode = fdentry(file)->d_inode; | 213 | struct inode *inode = fdentry(file)->d_inode; |
214 | struct btrfs_root *root = BTRFS_I(inode)->root; | 214 | struct btrfs_root *root = BTRFS_I(inode)->root; |
215 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | 215 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; |
216 | struct btrfs_ordered_extent *ordered; | ||
216 | struct page *page; | 217 | struct page *page; |
217 | unsigned long last_index; | 218 | unsigned long last_index; |
218 | unsigned long ra_pages = root->fs_info->bdi.ra_pages; | 219 | unsigned long ra_pages = root->fs_info->bdi.ra_pages; |
@@ -234,6 +235,7 @@ int btrfs_defrag_file(struct file *file) | |||
234 | min(last_index, i + ra_pages - 1)); | 235 | min(last_index, i + ra_pages - 1)); |
235 | } | 236 | } |
236 | total_read++; | 237 | total_read++; |
238 | again: | ||
237 | page = grab_cache_page(inode->i_mapping, i); | 239 | page = grab_cache_page(inode->i_mapping, i); |
238 | if (!page) | 240 | if (!page) |
239 | goto out_unlock; | 241 | goto out_unlock; |
@@ -247,18 +249,23 @@ int btrfs_defrag_file(struct file *file) | |||
247 | } | 249 | } |
248 | } | 250 | } |
249 | 251 | ||
250 | #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) | ||
251 | ClearPageDirty(page); | ||
252 | #else | ||
253 | cancel_dirty_page(page, PAGE_CACHE_SIZE); | ||
254 | #endif | ||
255 | wait_on_page_writeback(page); | 252 | wait_on_page_writeback(page); |
256 | set_page_extent_mapped(page); | ||
257 | 253 | ||
258 | page_start = (u64)page->index << PAGE_CACHE_SHIFT; | 254 | page_start = (u64)page->index << PAGE_CACHE_SHIFT; |
259 | page_end = page_start + PAGE_CACHE_SIZE - 1; | 255 | page_end = page_start + PAGE_CACHE_SIZE - 1; |
260 | |||
261 | lock_extent(io_tree, page_start, page_end, GFP_NOFS); | 256 | lock_extent(io_tree, page_start, page_end, GFP_NOFS); |
257 | |||
258 | ordered = btrfs_lookup_ordered_extent(inode, page_start); | ||
259 | if (ordered) { | ||
260 | unlock_extent(io_tree, page_start, page_end, GFP_NOFS); | ||
261 | unlock_page(page); | ||
262 | page_cache_release(page); | ||
263 | btrfs_start_ordered_extent(inode, ordered, 1); | ||
264 | btrfs_put_ordered_extent(ordered); | ||
265 | goto again; | ||
266 | } | ||
267 | set_page_extent_mapped(page); | ||
268 | |||
262 | set_extent_delalloc(io_tree, page_start, | 269 | set_extent_delalloc(io_tree, page_start, |
263 | page_end, GFP_NOFS); | 270 | page_end, GFP_NOFS); |
264 | 271 | ||
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index e42fd233e04c..676e4bd65c52 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c | |||
@@ -167,20 +167,28 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | |||
167 | entry->file_offset = file_offset; | 167 | entry->file_offset = file_offset; |
168 | entry->start = start; | 168 | entry->start = start; |
169 | entry->len = len; | 169 | entry->len = len; |
170 | entry->inode = inode; | ||
171 | |||
170 | /* one ref for the tree */ | 172 | /* one ref for the tree */ |
171 | atomic_set(&entry->refs, 1); | 173 | atomic_set(&entry->refs, 1); |
172 | init_waitqueue_head(&entry->wait); | 174 | init_waitqueue_head(&entry->wait); |
173 | INIT_LIST_HEAD(&entry->list); | 175 | INIT_LIST_HEAD(&entry->list); |
176 | INIT_LIST_HEAD(&entry->root_extent_list); | ||
174 | 177 | ||
175 | node = tree_insert(&tree->tree, file_offset, | 178 | node = tree_insert(&tree->tree, file_offset, |
176 | &entry->rb_node); | 179 | &entry->rb_node); |
177 | if (node) { | 180 | if (node) { |
178 | entry = rb_entry(node, struct btrfs_ordered_extent, rb_node); | 181 | printk("warning dup entry from add_ordered_extent\n"); |
179 | atomic_inc(&entry->refs); | 182 | BUG(); |
180 | } | 183 | } |
181 | set_extent_ordered(&BTRFS_I(inode)->io_tree, file_offset, | 184 | set_extent_ordered(&BTRFS_I(inode)->io_tree, file_offset, |
182 | entry_end(entry) - 1, GFP_NOFS); | 185 | entry_end(entry) - 1, GFP_NOFS); |
183 | 186 | ||
187 | spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); | ||
188 | list_add_tail(&entry->root_extent_list, | ||
189 | &BTRFS_I(inode)->root->fs_info->ordered_extents); | ||
190 | spin_unlock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); | ||
191 | |||
184 | mutex_unlock(&tree->mutex); | 192 | mutex_unlock(&tree->mutex); |
185 | BUG_ON(node); | 193 | BUG_ON(node); |
186 | return 0; | 194 | return 0; |
@@ -285,11 +293,55 @@ int btrfs_remove_ordered_extent(struct inode *inode, | |||
285 | rb_erase(node, &tree->tree); | 293 | rb_erase(node, &tree->tree); |
286 | tree->last = NULL; | 294 | tree->last = NULL; |
287 | set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags); | 295 | set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags); |
296 | |||
297 | spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); | ||
298 | list_del_init(&entry->root_extent_list); | ||
299 | spin_unlock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); | ||
300 | |||
288 | mutex_unlock(&tree->mutex); | 301 | mutex_unlock(&tree->mutex); |
289 | wake_up(&entry->wait); | 302 | wake_up(&entry->wait); |
290 | return 0; | 303 | return 0; |
291 | } | 304 | } |
292 | 305 | ||
306 | int btrfs_wait_ordered_extents(struct btrfs_root *root) | ||
307 | { | ||
308 | struct list_head splice; | ||
309 | struct list_head *cur; | ||
310 | struct btrfs_ordered_extent *ordered; | ||
311 | struct inode *inode; | ||
312 | |||
313 | INIT_LIST_HEAD(&splice); | ||
314 | |||
315 | spin_lock(&root->fs_info->ordered_extent_lock); | ||
316 | list_splice_init(&root->fs_info->ordered_extents, &splice); | ||
317 | while(!list_empty(&splice)) { | ||
318 | cur = splice.next; | ||
319 | ordered = list_entry(cur, struct btrfs_ordered_extent, | ||
320 | root_extent_list); | ||
321 | list_del_init(&ordered->root_extent_list); | ||
322 | atomic_inc(&ordered->refs); | ||
323 | inode = ordered->inode; | ||
324 | |||
325 | /* | ||
326 | * the inode can't go away until all the pages are gone | ||
327 | * and the pages won't go away while there is still | ||
328 | * an ordered extent and the ordered extent won't go | ||
329 | * away until it is off this list. So, we can safely | ||
330 | * increment i_count here and call iput later | ||
331 | */ | ||
332 | atomic_inc(&inode->i_count); | ||
333 | spin_unlock(&root->fs_info->ordered_extent_lock); | ||
334 | |||
335 | btrfs_start_ordered_extent(inode, ordered, 1); | ||
336 | btrfs_put_ordered_extent(ordered); | ||
337 | iput(inode); | ||
338 | |||
339 | spin_lock(&root->fs_info->ordered_extent_lock); | ||
340 | } | ||
341 | spin_unlock(&root->fs_info->ordered_extent_lock); | ||
342 | return 0; | ||
343 | } | ||
344 | |||
293 | /* | 345 | /* |
294 | * Used to start IO or wait for a given ordered extent to finish. | 346 | * Used to start IO or wait for a given ordered extent to finish. |
295 | * | 347 | * |
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index 199cb0b4f1d9..5efe6b63c74c 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h | |||
@@ -80,6 +80,9 @@ struct btrfs_ordered_extent { | |||
80 | /* reference count */ | 80 | /* reference count */ |
81 | atomic_t refs; | 81 | atomic_t refs; |
82 | 82 | ||
83 | /* the inode we belong to */ | ||
84 | struct inode *inode; | ||
85 | |||
83 | /* list of checksums for insertion when the extent io is done */ | 86 | /* list of checksums for insertion when the extent io is done */ |
84 | struct list_head list; | 87 | struct list_head list; |
85 | 88 | ||
@@ -88,6 +91,9 @@ struct btrfs_ordered_extent { | |||
88 | 91 | ||
89 | /* our friendly rbtree entry */ | 92 | /* our friendly rbtree entry */ |
90 | struct rb_node rb_node; | 93 | struct rb_node rb_node; |
94 | |||
95 | /* a per root list of all the pending ordered extents */ | ||
96 | struct list_head root_extent_list; | ||
91 | }; | 97 | }; |
92 | 98 | ||
93 | 99 | ||
@@ -137,4 +143,5 @@ int btrfs_wait_on_page_writeback_range(struct address_space *mapping, | |||
137 | pgoff_t start, pgoff_t end); | 143 | pgoff_t start, pgoff_t end); |
138 | int btrfs_fdatawrite_range(struct address_space *mapping, loff_t start, | 144 | int btrfs_fdatawrite_range(struct address_space *mapping, loff_t start, |
139 | loff_t end, int sync_mode); | 145 | loff_t end, int sync_mode); |
146 | int btrfs_wait_ordered_extents(struct btrfs_root *root); | ||
140 | #endif | 147 | #endif |