diff options
-rw-r--r-- | fs/btrfs/compression.c | 150 | ||||
-rw-r--r-- | fs/btrfs/ctree.h | 4 | ||||
-rw-r--r-- | fs/btrfs/disk-io.c | 27 | ||||
-rw-r--r-- | fs/btrfs/extent-tree.c | 6 | ||||
-rw-r--r-- | fs/btrfs/extent_io.c | 140 | ||||
-rw-r--r-- | fs/btrfs/extent_io.h | 13 | ||||
-rw-r--r-- | fs/btrfs/file.c | 53 | ||||
-rw-r--r-- | fs/btrfs/inode.c | 643 | ||||
-rw-r--r-- | fs/btrfs/ordered-data.c | 13 | ||||
-rw-r--r-- | fs/btrfs/super.c | 4 | ||||
-rw-r--r-- | fs/btrfs/zlib.c | 3 |
11 files changed, 849 insertions, 207 deletions
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 354913177ba6..284f21025bcc 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c | |||
@@ -33,6 +33,7 @@ | |||
33 | #include <linux/writeback.h> | 33 | #include <linux/writeback.h> |
34 | #include <linux/bit_spinlock.h> | 34 | #include <linux/bit_spinlock.h> |
35 | #include <linux/version.h> | 35 | #include <linux/version.h> |
36 | #include <linux/pagevec.h> | ||
36 | #include "ctree.h" | 37 | #include "ctree.h" |
37 | #include "disk-io.h" | 38 | #include "disk-io.h" |
38 | #include "transaction.h" | 39 | #include "transaction.h" |
@@ -145,9 +146,9 @@ static void end_compressed_bio_read(struct bio *bio, int err) | |||
145 | } | 146 | } |
146 | 147 | ||
147 | /* do io completion on the original bio */ | 148 | /* do io completion on the original bio */ |
148 | if (cb->errors) | 149 | if (cb->errors) { |
149 | bio_io_error(cb->orig_bio); | 150 | bio_io_error(cb->orig_bio); |
150 | else | 151 | } else |
151 | bio_endio(cb->orig_bio, 0); | 152 | bio_endio(cb->orig_bio, 0); |
152 | 153 | ||
153 | /* finally free the cb struct */ | 154 | /* finally free the cb struct */ |
@@ -333,6 +334,7 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start, | |||
333 | } | 334 | } |
334 | bytes_left -= PAGE_CACHE_SIZE; | 335 | bytes_left -= PAGE_CACHE_SIZE; |
335 | first_byte += PAGE_CACHE_SIZE; | 336 | first_byte += PAGE_CACHE_SIZE; |
337 | cond_resched(); | ||
336 | } | 338 | } |
337 | bio_get(bio); | 339 | bio_get(bio); |
338 | 340 | ||
@@ -346,6 +348,130 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start, | |||
346 | return 0; | 348 | return 0; |
347 | } | 349 | } |
348 | 350 | ||
351 | static noinline int add_ra_bio_pages(struct inode *inode, | ||
352 | u64 compressed_end, | ||
353 | struct compressed_bio *cb) | ||
354 | { | ||
355 | unsigned long end_index; | ||
356 | unsigned long page_index; | ||
357 | u64 last_offset; | ||
358 | u64 isize = i_size_read(inode); | ||
359 | int ret; | ||
360 | struct page *page; | ||
361 | unsigned long nr_pages = 0; | ||
362 | struct extent_map *em; | ||
363 | struct address_space *mapping = inode->i_mapping; | ||
364 | struct pagevec pvec; | ||
365 | struct extent_map_tree *em_tree; | ||
366 | struct extent_io_tree *tree; | ||
367 | u64 end; | ||
368 | int misses = 0; | ||
369 | |||
370 | page = cb->orig_bio->bi_io_vec[cb->orig_bio->bi_vcnt - 1].bv_page; | ||
371 | last_offset = (page_offset(page) + PAGE_CACHE_SIZE); | ||
372 | em_tree = &BTRFS_I(inode)->extent_tree; | ||
373 | tree = &BTRFS_I(inode)->io_tree; | ||
374 | |||
375 | if (isize == 0) | ||
376 | return 0; | ||
377 | |||
378 | end_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT; | ||
379 | |||
380 | pagevec_init(&pvec, 0); | ||
381 | while(last_offset < compressed_end) { | ||
382 | page_index = last_offset >> PAGE_CACHE_SHIFT; | ||
383 | |||
384 | if (page_index > end_index) | ||
385 | break; | ||
386 | |||
387 | rcu_read_lock(); | ||
388 | page = radix_tree_lookup(&mapping->page_tree, page_index); | ||
389 | rcu_read_unlock(); | ||
390 | if (page) { | ||
391 | misses++; | ||
392 | if (misses > 4) | ||
393 | break; | ||
394 | goto next; | ||
395 | } | ||
396 | |||
397 | page = alloc_page(mapping_gfp_mask(mapping) | GFP_NOFS); | ||
398 | if (!page) | ||
399 | break; | ||
400 | |||
401 | page->index = page_index; | ||
402 | /* | ||
403 | * what we want to do here is call add_to_page_cache_lru, | ||
404 | * but that isn't exported, so we reproduce it here | ||
405 | */ | ||
406 | if (add_to_page_cache(page, mapping, | ||
407 | page->index, GFP_NOFS)) { | ||
408 | page_cache_release(page); | ||
409 | goto next; | ||
410 | } | ||
411 | |||
412 | /* open coding of lru_cache_add, also not exported */ | ||
413 | page_cache_get(page); | ||
414 | if (!pagevec_add(&pvec, page)) | ||
415 | __pagevec_lru_add(&pvec); | ||
416 | |||
417 | end = last_offset + PAGE_CACHE_SIZE - 1; | ||
418 | /* | ||
419 | * at this point, we have a locked page in the page cache | ||
420 | * for these bytes in the file. But, we have to make | ||
421 | * sure they map to this compressed extent on disk. | ||
422 | */ | ||
423 | set_page_extent_mapped(page); | ||
424 | lock_extent(tree, last_offset, end, GFP_NOFS); | ||
425 | spin_lock(&em_tree->lock); | ||
426 | em = lookup_extent_mapping(em_tree, last_offset, | ||
427 | PAGE_CACHE_SIZE); | ||
428 | spin_unlock(&em_tree->lock); | ||
429 | |||
430 | if (!em || last_offset < em->start || | ||
431 | (last_offset + PAGE_CACHE_SIZE > extent_map_end(em)) || | ||
432 | (em->block_start >> 9) != cb->orig_bio->bi_sector) { | ||
433 | free_extent_map(em); | ||
434 | unlock_extent(tree, last_offset, end, GFP_NOFS); | ||
435 | unlock_page(page); | ||
436 | page_cache_release(page); | ||
437 | break; | ||
438 | } | ||
439 | free_extent_map(em); | ||
440 | |||
441 | if (page->index == end_index) { | ||
442 | char *userpage; | ||
443 | size_t zero_offset = isize & (PAGE_CACHE_SIZE - 1); | ||
444 | |||
445 | if (zero_offset) { | ||
446 | int zeros; | ||
447 | zeros = PAGE_CACHE_SIZE - zero_offset; | ||
448 | userpage = kmap_atomic(page, KM_USER0); | ||
449 | memset(userpage + zero_offset, 0, zeros); | ||
450 | flush_dcache_page(page); | ||
451 | kunmap_atomic(userpage, KM_USER0); | ||
452 | } | ||
453 | } | ||
454 | |||
455 | ret = bio_add_page(cb->orig_bio, page, | ||
456 | PAGE_CACHE_SIZE, 0); | ||
457 | |||
458 | if (ret == PAGE_CACHE_SIZE) { | ||
459 | nr_pages++; | ||
460 | page_cache_release(page); | ||
461 | } else { | ||
462 | unlock_extent(tree, last_offset, end, GFP_NOFS); | ||
463 | unlock_page(page); | ||
464 | page_cache_release(page); | ||
465 | break; | ||
466 | } | ||
467 | next: | ||
468 | last_offset += PAGE_CACHE_SIZE; | ||
469 | } | ||
470 | if (pagevec_count(&pvec)) | ||
471 | __pagevec_lru_add(&pvec); | ||
472 | return 0; | ||
473 | } | ||
474 | |||
349 | /* | 475 | /* |
350 | * for a compressed read, the bio we get passed has all the inode pages | 476 | * for a compressed read, the bio we get passed has all the inode pages |
351 | * in it. We don't actually do IO on those pages but allocate new ones | 477 | * in it. We don't actually do IO on those pages but allocate new ones |
@@ -373,6 +499,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, | |||
373 | struct block_device *bdev; | 499 | struct block_device *bdev; |
374 | struct bio *comp_bio; | 500 | struct bio *comp_bio; |
375 | u64 cur_disk_byte = (u64)bio->bi_sector << 9; | 501 | u64 cur_disk_byte = (u64)bio->bi_sector << 9; |
502 | u64 em_len; | ||
376 | struct extent_map *em; | 503 | struct extent_map *em; |
377 | int ret; | 504 | int ret; |
378 | 505 | ||
@@ -393,6 +520,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, | |||
393 | 520 | ||
394 | cb->start = em->start; | 521 | cb->start = em->start; |
395 | compressed_len = em->block_len; | 522 | compressed_len = em->block_len; |
523 | em_len = em->len; | ||
396 | free_extent_map(em); | 524 | free_extent_map(em); |
397 | 525 | ||
398 | cb->len = uncompressed_len; | 526 | cb->len = uncompressed_len; |
@@ -411,6 +539,17 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, | |||
411 | } | 539 | } |
412 | cb->nr_pages = nr_pages; | 540 | cb->nr_pages = nr_pages; |
413 | 541 | ||
542 | add_ra_bio_pages(inode, cb->start + em_len, cb); | ||
543 | |||
544 | if (!btrfs_test_opt(root, NODATASUM) && | ||
545 | !btrfs_test_flag(inode, NODATASUM)) { | ||
546 | btrfs_lookup_bio_sums(root, inode, cb->orig_bio); | ||
547 | } | ||
548 | |||
549 | /* include any pages we added in add_ra-bio_pages */ | ||
550 | uncompressed_len = bio->bi_vcnt * PAGE_CACHE_SIZE; | ||
551 | cb->len = uncompressed_len; | ||
552 | |||
414 | comp_bio = compressed_bio_alloc(bdev, cur_disk_byte, GFP_NOFS); | 553 | comp_bio = compressed_bio_alloc(bdev, cur_disk_byte, GFP_NOFS); |
415 | comp_bio->bi_private = cb; | 554 | comp_bio->bi_private = cb; |
416 | comp_bio->bi_end_io = end_compressed_bio_read; | 555 | comp_bio->bi_end_io = end_compressed_bio_read; |
@@ -442,9 +581,10 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, | |||
442 | comp_bio = compressed_bio_alloc(bdev, cur_disk_byte, | 581 | comp_bio = compressed_bio_alloc(bdev, cur_disk_byte, |
443 | GFP_NOFS); | 582 | GFP_NOFS); |
444 | atomic_inc(&cb->pending_bios); | 583 | atomic_inc(&cb->pending_bios); |
445 | bio->bi_private = cb; | 584 | comp_bio->bi_private = cb; |
446 | bio->bi_end_io = end_compressed_bio_write; | 585 | comp_bio->bi_end_io = end_compressed_bio_read; |
447 | bio_add_page(bio, page, PAGE_CACHE_SIZE, 0); | 586 | |
587 | bio_add_page(comp_bio, page, PAGE_CACHE_SIZE, 0); | ||
448 | } | 588 | } |
449 | cur_disk_byte += PAGE_CACHE_SIZE; | 589 | cur_disk_byte += PAGE_CACHE_SIZE; |
450 | } | 590 | } |
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 689df070c8e9..c83cc5b2ded7 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
@@ -625,8 +625,8 @@ struct btrfs_fs_info { | |||
625 | struct btrfs_transaction *running_transaction; | 625 | struct btrfs_transaction *running_transaction; |
626 | wait_queue_head_t transaction_throttle; | 626 | wait_queue_head_t transaction_throttle; |
627 | wait_queue_head_t transaction_wait; | 627 | wait_queue_head_t transaction_wait; |
628 | wait_queue_head_t async_submit_wait; | ||
629 | 628 | ||
629 | wait_queue_head_t async_submit_wait; | ||
630 | wait_queue_head_t tree_log_wait; | 630 | wait_queue_head_t tree_log_wait; |
631 | 631 | ||
632 | struct btrfs_super_block super_copy; | 632 | struct btrfs_super_block super_copy; |
@@ -653,6 +653,7 @@ struct btrfs_fs_info { | |||
653 | atomic_t nr_async_submits; | 653 | atomic_t nr_async_submits; |
654 | atomic_t async_submit_draining; | 654 | atomic_t async_submit_draining; |
655 | atomic_t nr_async_bios; | 655 | atomic_t nr_async_bios; |
656 | atomic_t async_delalloc_pages; | ||
656 | atomic_t tree_log_writers; | 657 | atomic_t tree_log_writers; |
657 | atomic_t tree_log_commit; | 658 | atomic_t tree_log_commit; |
658 | unsigned long tree_log_batch; | 659 | unsigned long tree_log_batch; |
@@ -677,6 +678,7 @@ struct btrfs_fs_info { | |||
677 | * two | 678 | * two |
678 | */ | 679 | */ |
679 | struct btrfs_workers workers; | 680 | struct btrfs_workers workers; |
681 | struct btrfs_workers delalloc_workers; | ||
680 | struct btrfs_workers endio_workers; | 682 | struct btrfs_workers endio_workers; |
681 | struct btrfs_workers endio_write_workers; | 683 | struct btrfs_workers endio_write_workers; |
682 | struct btrfs_workers submit_workers; | 684 | struct btrfs_workers submit_workers; |
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index e0a28f705a64..8efc123d222b 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -539,6 +539,13 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, | |||
539 | (atomic_read(&fs_info->nr_async_bios) < limit), | 539 | (atomic_read(&fs_info->nr_async_bios) < limit), |
540 | HZ/10); | 540 | HZ/10); |
541 | } | 541 | } |
542 | |||
543 | while(atomic_read(&fs_info->async_submit_draining) && | ||
544 | atomic_read(&fs_info->nr_async_submits)) { | ||
545 | wait_event(fs_info->async_submit_wait, | ||
546 | (atomic_read(&fs_info->nr_async_submits) == 0)); | ||
547 | } | ||
548 | |||
542 | return 0; | 549 | return 0; |
543 | } | 550 | } |
544 | 551 | ||
@@ -1437,6 +1444,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1437 | INIT_LIST_HEAD(&fs_info->space_info); | 1444 | INIT_LIST_HEAD(&fs_info->space_info); |
1438 | btrfs_mapping_init(&fs_info->mapping_tree); | 1445 | btrfs_mapping_init(&fs_info->mapping_tree); |
1439 | atomic_set(&fs_info->nr_async_submits, 0); | 1446 | atomic_set(&fs_info->nr_async_submits, 0); |
1447 | atomic_set(&fs_info->async_delalloc_pages, 0); | ||
1440 | atomic_set(&fs_info->async_submit_draining, 0); | 1448 | atomic_set(&fs_info->async_submit_draining, 0); |
1441 | atomic_set(&fs_info->nr_async_bios, 0); | 1449 | atomic_set(&fs_info->nr_async_bios, 0); |
1442 | atomic_set(&fs_info->throttles, 0); | 1450 | atomic_set(&fs_info->throttles, 0); |
@@ -1550,6 +1558,9 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1550 | btrfs_init_workers(&fs_info->workers, "worker", | 1558 | btrfs_init_workers(&fs_info->workers, "worker", |
1551 | fs_info->thread_pool_size); | 1559 | fs_info->thread_pool_size); |
1552 | 1560 | ||
1561 | btrfs_init_workers(&fs_info->delalloc_workers, "delalloc", | ||
1562 | fs_info->thread_pool_size); | ||
1563 | |||
1553 | btrfs_init_workers(&fs_info->submit_workers, "submit", | 1564 | btrfs_init_workers(&fs_info->submit_workers, "submit", |
1554 | min_t(u64, fs_devices->num_devices, | 1565 | min_t(u64, fs_devices->num_devices, |
1555 | fs_info->thread_pool_size)); | 1566 | fs_info->thread_pool_size)); |
@@ -1560,15 +1571,12 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1560 | */ | 1571 | */ |
1561 | fs_info->submit_workers.idle_thresh = 64; | 1572 | fs_info->submit_workers.idle_thresh = 64; |
1562 | 1573 | ||
1563 | /* fs_info->workers is responsible for checksumming file data | 1574 | fs_info->workers.idle_thresh = 16; |
1564 | * blocks and metadata. Using a larger idle thresh allows each | ||
1565 | * worker thread to operate on things in roughly the order they | ||
1566 | * were sent by the writeback daemons, improving overall locality | ||
1567 | * of the IO going down the pipe. | ||
1568 | */ | ||
1569 | fs_info->workers.idle_thresh = 8; | ||
1570 | fs_info->workers.ordered = 1; | 1575 | fs_info->workers.ordered = 1; |
1571 | 1576 | ||
1577 | fs_info->delalloc_workers.idle_thresh = 2; | ||
1578 | fs_info->delalloc_workers.ordered = 1; | ||
1579 | |||
1572 | btrfs_init_workers(&fs_info->fixup_workers, "fixup", 1); | 1580 | btrfs_init_workers(&fs_info->fixup_workers, "fixup", 1); |
1573 | btrfs_init_workers(&fs_info->endio_workers, "endio", | 1581 | btrfs_init_workers(&fs_info->endio_workers, "endio", |
1574 | fs_info->thread_pool_size); | 1582 | fs_info->thread_pool_size); |
@@ -1584,6 +1592,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1584 | 1592 | ||
1585 | btrfs_start_workers(&fs_info->workers, 1); | 1593 | btrfs_start_workers(&fs_info->workers, 1); |
1586 | btrfs_start_workers(&fs_info->submit_workers, 1); | 1594 | btrfs_start_workers(&fs_info->submit_workers, 1); |
1595 | btrfs_start_workers(&fs_info->delalloc_workers, 1); | ||
1587 | btrfs_start_workers(&fs_info->fixup_workers, 1); | 1596 | btrfs_start_workers(&fs_info->fixup_workers, 1); |
1588 | btrfs_start_workers(&fs_info->endio_workers, fs_info->thread_pool_size); | 1597 | btrfs_start_workers(&fs_info->endio_workers, fs_info->thread_pool_size); |
1589 | btrfs_start_workers(&fs_info->endio_write_workers, | 1598 | btrfs_start_workers(&fs_info->endio_write_workers, |
@@ -1732,6 +1741,7 @@ fail_tree_root: | |||
1732 | fail_sys_array: | 1741 | fail_sys_array: |
1733 | fail_sb_buffer: | 1742 | fail_sb_buffer: |
1734 | btrfs_stop_workers(&fs_info->fixup_workers); | 1743 | btrfs_stop_workers(&fs_info->fixup_workers); |
1744 | btrfs_stop_workers(&fs_info->delalloc_workers); | ||
1735 | btrfs_stop_workers(&fs_info->workers); | 1745 | btrfs_stop_workers(&fs_info->workers); |
1736 | btrfs_stop_workers(&fs_info->endio_workers); | 1746 | btrfs_stop_workers(&fs_info->endio_workers); |
1737 | btrfs_stop_workers(&fs_info->endio_write_workers); | 1747 | btrfs_stop_workers(&fs_info->endio_write_workers); |
@@ -1988,6 +1998,7 @@ int close_ctree(struct btrfs_root *root) | |||
1988 | truncate_inode_pages(fs_info->btree_inode->i_mapping, 0); | 1998 | truncate_inode_pages(fs_info->btree_inode->i_mapping, 0); |
1989 | 1999 | ||
1990 | btrfs_stop_workers(&fs_info->fixup_workers); | 2000 | btrfs_stop_workers(&fs_info->fixup_workers); |
2001 | btrfs_stop_workers(&fs_info->delalloc_workers); | ||
1991 | btrfs_stop_workers(&fs_info->workers); | 2002 | btrfs_stop_workers(&fs_info->workers); |
1992 | btrfs_stop_workers(&fs_info->endio_workers); | 2003 | btrfs_stop_workers(&fs_info->endio_workers); |
1993 | btrfs_stop_workers(&fs_info->endio_write_workers); | 2004 | btrfs_stop_workers(&fs_info->endio_write_workers); |
@@ -2062,7 +2073,7 @@ void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) | |||
2062 | struct extent_io_tree *tree; | 2073 | struct extent_io_tree *tree; |
2063 | u64 num_dirty; | 2074 | u64 num_dirty; |
2064 | u64 start = 0; | 2075 | u64 start = 0; |
2065 | unsigned long thresh = 96 * 1024 * 1024; | 2076 | unsigned long thresh = 32 * 1024 * 1024; |
2066 | tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree; | 2077 | tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree; |
2067 | 2078 | ||
2068 | if (current_is_pdflush() || current->flags & PF_MEMALLOC) | 2079 | if (current_is_pdflush() || current->flags & PF_MEMALLOC) |
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 8af39521eb71..ebd8275a1934 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -768,7 +768,11 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, | |||
768 | l = path->nodes[0]; | 768 | l = path->nodes[0]; |
769 | 769 | ||
770 | btrfs_item_key_to_cpu(l, &key, path->slots[0]); | 770 | btrfs_item_key_to_cpu(l, &key, path->slots[0]); |
771 | BUG_ON(key.objectid != bytenr); | 771 | if (key.objectid != bytenr) { |
772 | btrfs_print_leaf(root->fs_info->extent_root, path->nodes[0]); | ||
773 | printk("wanted %Lu found %Lu\n", bytenr, key.objectid); | ||
774 | BUG(); | ||
775 | } | ||
772 | BUG_ON(key.type != BTRFS_EXTENT_ITEM_KEY); | 776 | BUG_ON(key.type != BTRFS_EXTENT_ITEM_KEY); |
773 | 777 | ||
774 | item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item); | 778 | item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item); |
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 9b37ce6e5168..bbe3bcfcf4ae 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
@@ -47,6 +47,11 @@ struct extent_page_data { | |||
47 | struct bio *bio; | 47 | struct bio *bio; |
48 | struct extent_io_tree *tree; | 48 | struct extent_io_tree *tree; |
49 | get_extent_t *get_extent; | 49 | get_extent_t *get_extent; |
50 | |||
51 | /* tells writepage not to lock the state bits for this range | ||
52 | * it still does the unlocking | ||
53 | */ | ||
54 | int extent_locked; | ||
50 | }; | 55 | }; |
51 | 56 | ||
52 | int __init extent_io_init(void) | 57 | int __init extent_io_init(void) |
@@ -1198,11 +1203,18 @@ static noinline int lock_delalloc_pages(struct inode *inode, | |||
1198 | * the caller is taking responsibility for | 1203 | * the caller is taking responsibility for |
1199 | * locked_page | 1204 | * locked_page |
1200 | */ | 1205 | */ |
1201 | if (pages[i] != locked_page) | 1206 | if (pages[i] != locked_page) { |
1202 | lock_page(pages[i]); | 1207 | lock_page(pages[i]); |
1208 | if (pages[i]->mapping != inode->i_mapping) { | ||
1209 | ret = -EAGAIN; | ||
1210 | unlock_page(pages[i]); | ||
1211 | page_cache_release(pages[i]); | ||
1212 | goto done; | ||
1213 | } | ||
1214 | } | ||
1203 | page_cache_release(pages[i]); | 1215 | page_cache_release(pages[i]); |
1216 | pages_locked++; | ||
1204 | } | 1217 | } |
1205 | pages_locked += ret; | ||
1206 | nrpages -= ret; | 1218 | nrpages -= ret; |
1207 | index += ret; | 1219 | index += ret; |
1208 | cond_resched(); | 1220 | cond_resched(); |
@@ -1262,8 +1274,7 @@ again: | |||
1262 | * if we're looping. | 1274 | * if we're looping. |
1263 | */ | 1275 | */ |
1264 | if (delalloc_end + 1 - delalloc_start > max_bytes && loops) { | 1276 | if (delalloc_end + 1 - delalloc_start > max_bytes && loops) { |
1265 | delalloc_end = (delalloc_start + PAGE_CACHE_SIZE - 1) & | 1277 | delalloc_end = delalloc_start + PAGE_CACHE_SIZE - 1; |
1266 | ~((u64)PAGE_CACHE_SIZE - 1); | ||
1267 | } | 1278 | } |
1268 | /* step two, lock all the pages after the page that has start */ | 1279 | /* step two, lock all the pages after the page that has start */ |
1269 | ret = lock_delalloc_pages(inode, locked_page, | 1280 | ret = lock_delalloc_pages(inode, locked_page, |
@@ -1306,7 +1317,10 @@ out_failed: | |||
1306 | int extent_clear_unlock_delalloc(struct inode *inode, | 1317 | int extent_clear_unlock_delalloc(struct inode *inode, |
1307 | struct extent_io_tree *tree, | 1318 | struct extent_io_tree *tree, |
1308 | u64 start, u64 end, struct page *locked_page, | 1319 | u64 start, u64 end, struct page *locked_page, |
1309 | int clear_dirty, int set_writeback, | 1320 | int unlock_pages, |
1321 | int clear_unlock, | ||
1322 | int clear_delalloc, int clear_dirty, | ||
1323 | int set_writeback, | ||
1310 | int end_writeback) | 1324 | int end_writeback) |
1311 | { | 1325 | { |
1312 | int ret; | 1326 | int ret; |
@@ -1315,12 +1329,19 @@ int extent_clear_unlock_delalloc(struct inode *inode, | |||
1315 | unsigned long end_index = end >> PAGE_CACHE_SHIFT; | 1329 | unsigned long end_index = end >> PAGE_CACHE_SHIFT; |
1316 | unsigned long nr_pages = end_index - index + 1; | 1330 | unsigned long nr_pages = end_index - index + 1; |
1317 | int i; | 1331 | int i; |
1318 | int clear_bits = EXTENT_LOCKED | EXTENT_DELALLOC; | 1332 | int clear_bits = 0; |
1319 | 1333 | ||
1334 | if (clear_unlock) | ||
1335 | clear_bits |= EXTENT_LOCKED; | ||
1320 | if (clear_dirty) | 1336 | if (clear_dirty) |
1321 | clear_bits |= EXTENT_DIRTY; | 1337 | clear_bits |= EXTENT_DIRTY; |
1322 | 1338 | ||
1339 | if (clear_delalloc) | ||
1340 | clear_bits |= EXTENT_DELALLOC; | ||
1341 | |||
1323 | clear_extent_bit(tree, start, end, clear_bits, 1, 0, GFP_NOFS); | 1342 | clear_extent_bit(tree, start, end, clear_bits, 1, 0, GFP_NOFS); |
1343 | if (!(unlock_pages || clear_dirty || set_writeback || end_writeback)) | ||
1344 | return 0; | ||
1324 | 1345 | ||
1325 | while(nr_pages > 0) { | 1346 | while(nr_pages > 0) { |
1326 | ret = find_get_pages_contig(inode->i_mapping, index, | 1347 | ret = find_get_pages_contig(inode->i_mapping, index, |
@@ -1336,7 +1357,8 @@ int extent_clear_unlock_delalloc(struct inode *inode, | |||
1336 | set_page_writeback(pages[i]); | 1357 | set_page_writeback(pages[i]); |
1337 | if (end_writeback) | 1358 | if (end_writeback) |
1338 | end_page_writeback(pages[i]); | 1359 | end_page_writeback(pages[i]); |
1339 | unlock_page(pages[i]); | 1360 | if (unlock_pages) |
1361 | unlock_page(pages[i]); | ||
1340 | page_cache_release(pages[i]); | 1362 | page_cache_release(pages[i]); |
1341 | } | 1363 | } |
1342 | nr_pages -= ret; | 1364 | nr_pages -= ret; |
@@ -1741,9 +1763,10 @@ static void end_bio_extent_readpage(struct bio *bio, int err) | |||
1741 | } | 1763 | } |
1742 | } | 1764 | } |
1743 | 1765 | ||
1744 | if (uptodate) | 1766 | if (uptodate) { |
1745 | set_extent_uptodate(tree, start, end, | 1767 | set_extent_uptodate(tree, start, end, |
1746 | GFP_ATOMIC); | 1768 | GFP_ATOMIC); |
1769 | } | ||
1747 | unlock_extent(tree, start, end, GFP_ATOMIC); | 1770 | unlock_extent(tree, start, end, GFP_ATOMIC); |
1748 | 1771 | ||
1749 | if (whole_page) { | 1772 | if (whole_page) { |
@@ -1925,6 +1948,7 @@ void set_page_extent_mapped(struct page *page) | |||
1925 | set_page_private(page, EXTENT_PAGE_PRIVATE); | 1948 | set_page_private(page, EXTENT_PAGE_PRIVATE); |
1926 | } | 1949 | } |
1927 | } | 1950 | } |
1951 | EXPORT_SYMBOL(set_page_extent_mapped); | ||
1928 | 1952 | ||
1929 | void set_page_extent_head(struct page *page, unsigned long len) | 1953 | void set_page_extent_head(struct page *page, unsigned long len) |
1930 | { | 1954 | { |
@@ -2143,12 +2167,17 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
2143 | u64 delalloc_end; | 2167 | u64 delalloc_end; |
2144 | int page_started; | 2168 | int page_started; |
2145 | int compressed; | 2169 | int compressed; |
2170 | unsigned long nr_written = 0; | ||
2146 | 2171 | ||
2147 | WARN_ON(!PageLocked(page)); | 2172 | WARN_ON(!PageLocked(page)); |
2148 | pg_offset = i_size & (PAGE_CACHE_SIZE - 1); | 2173 | pg_offset = i_size & (PAGE_CACHE_SIZE - 1); |
2149 | if (page->index > end_index || | 2174 | if (page->index > end_index || |
2150 | (page->index == end_index && !pg_offset)) { | 2175 | (page->index == end_index && !pg_offset)) { |
2151 | page->mapping->a_ops->invalidatepage(page, 0); | 2176 | if (epd->extent_locked) { |
2177 | if (tree->ops && tree->ops->writepage_end_io_hook) | ||
2178 | tree->ops->writepage_end_io_hook(page, start, | ||
2179 | page_end, NULL, 1); | ||
2180 | } | ||
2152 | unlock_page(page); | 2181 | unlock_page(page); |
2153 | return 0; | 2182 | return 0; |
2154 | } | 2183 | } |
@@ -2169,27 +2198,33 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
2169 | delalloc_start = start; | 2198 | delalloc_start = start; |
2170 | delalloc_end = 0; | 2199 | delalloc_end = 0; |
2171 | page_started = 0; | 2200 | page_started = 0; |
2172 | while(delalloc_end < page_end) { | 2201 | if (!epd->extent_locked) { |
2173 | nr_delalloc = find_lock_delalloc_range(inode, tree, | 2202 | while(delalloc_end < page_end) { |
2203 | nr_delalloc = find_lock_delalloc_range(inode, tree, | ||
2174 | page, | 2204 | page, |
2175 | &delalloc_start, | 2205 | &delalloc_start, |
2176 | &delalloc_end, | 2206 | &delalloc_end, |
2177 | 128 * 1024 * 1024); | 2207 | 128 * 1024 * 1024); |
2178 | if (nr_delalloc == 0) { | 2208 | if (nr_delalloc == 0) { |
2209 | delalloc_start = delalloc_end + 1; | ||
2210 | continue; | ||
2211 | } | ||
2212 | tree->ops->fill_delalloc(inode, page, delalloc_start, | ||
2213 | delalloc_end, &page_started, | ||
2214 | &nr_written); | ||
2179 | delalloc_start = delalloc_end + 1; | 2215 | delalloc_start = delalloc_end + 1; |
2180 | continue; | ||
2181 | } | 2216 | } |
2182 | tree->ops->fill_delalloc(inode, page, delalloc_start, | ||
2183 | delalloc_end, &page_started); | ||
2184 | delalloc_start = delalloc_end + 1; | ||
2185 | } | ||
2186 | 2217 | ||
2187 | /* did the fill delalloc function already unlock and start the IO? */ | 2218 | /* did the fill delalloc function already unlock and start |
2188 | if (page_started) { | 2219 | * the IO? |
2189 | return 0; | 2220 | */ |
2221 | if (page_started) { | ||
2222 | ret = 0; | ||
2223 | goto update_nr_written; | ||
2224 | } | ||
2190 | } | 2225 | } |
2191 | |||
2192 | lock_extent(tree, start, page_end, GFP_NOFS); | 2226 | lock_extent(tree, start, page_end, GFP_NOFS); |
2227 | |||
2193 | unlock_start = start; | 2228 | unlock_start = start; |
2194 | 2229 | ||
2195 | if (tree->ops && tree->ops->writepage_start_hook) { | 2230 | if (tree->ops && tree->ops->writepage_start_hook) { |
@@ -2199,10 +2234,13 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
2199 | unlock_extent(tree, start, page_end, GFP_NOFS); | 2234 | unlock_extent(tree, start, page_end, GFP_NOFS); |
2200 | redirty_page_for_writepage(wbc, page); | 2235 | redirty_page_for_writepage(wbc, page); |
2201 | unlock_page(page); | 2236 | unlock_page(page); |
2202 | return 0; | 2237 | ret = 0; |
2238 | goto update_nr_written; | ||
2203 | } | 2239 | } |
2204 | } | 2240 | } |
2205 | 2241 | ||
2242 | nr_written++; | ||
2243 | |||
2206 | end = page_end; | 2244 | end = page_end; |
2207 | if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) { | 2245 | if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) { |
2208 | printk("found delalloc bits after lock_extent\n"); | 2246 | printk("found delalloc bits after lock_extent\n"); |
@@ -2333,6 +2371,12 @@ done: | |||
2333 | if (unlock_start <= page_end) | 2371 | if (unlock_start <= page_end) |
2334 | unlock_extent(tree, unlock_start, page_end, GFP_NOFS); | 2372 | unlock_extent(tree, unlock_start, page_end, GFP_NOFS); |
2335 | unlock_page(page); | 2373 | unlock_page(page); |
2374 | |||
2375 | update_nr_written: | ||
2376 | wbc->nr_to_write -= nr_written; | ||
2377 | if (wbc->range_cyclic || (wbc->nr_to_write > 0 && | ||
2378 | wbc->range_start == 0 && wbc->range_end == LLONG_MAX)) | ||
2379 | page->mapping->writeback_index = page->index + nr_written; | ||
2336 | return 0; | 2380 | return 0; |
2337 | } | 2381 | } |
2338 | 2382 | ||
@@ -2431,7 +2475,7 @@ retry: | |||
2431 | unlock_page(page); | 2475 | unlock_page(page); |
2432 | ret = 0; | 2476 | ret = 0; |
2433 | } | 2477 | } |
2434 | if (ret || (--(wbc->nr_to_write) <= 0)) | 2478 | if (ret || wbc->nr_to_write <= 0) |
2435 | done = 1; | 2479 | done = 1; |
2436 | if (wbc->nonblocking && bdi_write_congested(bdi)) { | 2480 | if (wbc->nonblocking && bdi_write_congested(bdi)) { |
2437 | wbc->encountered_congestion = 1; | 2481 | wbc->encountered_congestion = 1; |
@@ -2452,6 +2496,8 @@ retry: | |||
2452 | } | 2496 | } |
2453 | if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) | 2497 | if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) |
2454 | mapping->writeback_index = index; | 2498 | mapping->writeback_index = index; |
2499 | if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) | ||
2500 | range_whole = 1; | ||
2455 | 2501 | ||
2456 | if (wbc->range_cont) | 2502 | if (wbc->range_cont) |
2457 | wbc->range_start = index << PAGE_CACHE_SHIFT; | 2503 | wbc->range_start = index << PAGE_CACHE_SHIFT; |
@@ -2469,6 +2515,7 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page, | |||
2469 | .bio = NULL, | 2515 | .bio = NULL, |
2470 | .tree = tree, | 2516 | .tree = tree, |
2471 | .get_extent = get_extent, | 2517 | .get_extent = get_extent, |
2518 | .extent_locked = 0, | ||
2472 | }; | 2519 | }; |
2473 | struct writeback_control wbc_writepages = { | 2520 | struct writeback_control wbc_writepages = { |
2474 | .bdi = wbc->bdi, | 2521 | .bdi = wbc->bdi, |
@@ -2491,6 +2538,52 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page, | |||
2491 | } | 2538 | } |
2492 | EXPORT_SYMBOL(extent_write_full_page); | 2539 | EXPORT_SYMBOL(extent_write_full_page); |
2493 | 2540 | ||
2541 | int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode, | ||
2542 | u64 start, u64 end, get_extent_t *get_extent, | ||
2543 | int mode) | ||
2544 | { | ||
2545 | int ret = 0; | ||
2546 | struct address_space *mapping = inode->i_mapping; | ||
2547 | struct page *page; | ||
2548 | unsigned long nr_pages = (end - start + PAGE_CACHE_SIZE) >> | ||
2549 | PAGE_CACHE_SHIFT; | ||
2550 | |||
2551 | struct extent_page_data epd = { | ||
2552 | .bio = NULL, | ||
2553 | .tree = tree, | ||
2554 | .get_extent = get_extent, | ||
2555 | .extent_locked = 1, | ||
2556 | }; | ||
2557 | struct writeback_control wbc_writepages = { | ||
2558 | .bdi = inode->i_mapping->backing_dev_info, | ||
2559 | .sync_mode = mode, | ||
2560 | .older_than_this = NULL, | ||
2561 | .nr_to_write = nr_pages * 2, | ||
2562 | .range_start = start, | ||
2563 | .range_end = end + 1, | ||
2564 | }; | ||
2565 | |||
2566 | while(start <= end) { | ||
2567 | page = find_get_page(mapping, start >> PAGE_CACHE_SHIFT); | ||
2568 | if (clear_page_dirty_for_io(page)) | ||
2569 | ret = __extent_writepage(page, &wbc_writepages, &epd); | ||
2570 | else { | ||
2571 | if (tree->ops && tree->ops->writepage_end_io_hook) | ||
2572 | tree->ops->writepage_end_io_hook(page, start, | ||
2573 | start + PAGE_CACHE_SIZE - 1, | ||
2574 | NULL, 1); | ||
2575 | unlock_page(page); | ||
2576 | } | ||
2577 | page_cache_release(page); | ||
2578 | start += PAGE_CACHE_SIZE; | ||
2579 | } | ||
2580 | |||
2581 | if (epd.bio) | ||
2582 | submit_one_bio(WRITE, epd.bio, 0, 0); | ||
2583 | return ret; | ||
2584 | } | ||
2585 | EXPORT_SYMBOL(extent_write_locked_range); | ||
2586 | |||
2494 | 2587 | ||
2495 | int extent_writepages(struct extent_io_tree *tree, | 2588 | int extent_writepages(struct extent_io_tree *tree, |
2496 | struct address_space *mapping, | 2589 | struct address_space *mapping, |
@@ -2502,6 +2595,7 @@ int extent_writepages(struct extent_io_tree *tree, | |||
2502 | .bio = NULL, | 2595 | .bio = NULL, |
2503 | .tree = tree, | 2596 | .tree = tree, |
2504 | .get_extent = get_extent, | 2597 | .get_extent = get_extent, |
2598 | .extent_locked = 0, | ||
2505 | }; | 2599 | }; |
2506 | 2600 | ||
2507 | ret = extent_write_cache_pages(tree, mapping, wbc, | 2601 | ret = extent_write_cache_pages(tree, mapping, wbc, |
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 283110ec4ee0..2d5f67065b69 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h | |||
@@ -35,7 +35,8 @@ typedef int (extent_submit_bio_hook_t)(struct inode *inode, int rw, | |||
35 | unsigned long bio_flags); | 35 | unsigned long bio_flags); |
36 | struct extent_io_ops { | 36 | struct extent_io_ops { |
37 | int (*fill_delalloc)(struct inode *inode, struct page *locked_page, | 37 | int (*fill_delalloc)(struct inode *inode, struct page *locked_page, |
38 | u64 start, u64 end, int *page_started); | 38 | u64 start, u64 end, int *page_started, |
39 | unsigned long *nr_written); | ||
39 | int (*writepage_start_hook)(struct page *page, u64 start, u64 end); | 40 | int (*writepage_start_hook)(struct page *page, u64 start, u64 end); |
40 | int (*writepage_io_hook)(struct page *page, u64 start, u64 end); | 41 | int (*writepage_io_hook)(struct page *page, u64 start, u64 end); |
41 | extent_submit_bio_hook_t *submit_bio_hook; | 42 | extent_submit_bio_hook_t *submit_bio_hook; |
@@ -172,6 +173,9 @@ int extent_invalidatepage(struct extent_io_tree *tree, | |||
172 | int extent_write_full_page(struct extent_io_tree *tree, struct page *page, | 173 | int extent_write_full_page(struct extent_io_tree *tree, struct page *page, |
173 | get_extent_t *get_extent, | 174 | get_extent_t *get_extent, |
174 | struct writeback_control *wbc); | 175 | struct writeback_control *wbc); |
176 | int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode, | ||
177 | u64 start, u64 end, get_extent_t *get_extent, | ||
178 | int mode); | ||
175 | int extent_writepages(struct extent_io_tree *tree, | 179 | int extent_writepages(struct extent_io_tree *tree, |
176 | struct address_space *mapping, | 180 | struct address_space *mapping, |
177 | get_extent_t *get_extent, | 181 | get_extent_t *get_extent, |
@@ -256,6 +260,9 @@ int extent_range_uptodate(struct extent_io_tree *tree, | |||
256 | int extent_clear_unlock_delalloc(struct inode *inode, | 260 | int extent_clear_unlock_delalloc(struct inode *inode, |
257 | struct extent_io_tree *tree, | 261 | struct extent_io_tree *tree, |
258 | u64 start, u64 end, struct page *locked_page, | 262 | u64 start, u64 end, struct page *locked_page, |
259 | int clear_dirty, int set_writeback, | 263 | int unlock_page, |
260 | int clear_writeback); | 264 | int clear_unlock, |
265 | int clear_delalloc, int clear_dirty, | ||
266 | int set_writeback, | ||
267 | int end_writeback); | ||
261 | #endif | 268 | #endif |
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 0c8cc35a8b97..337221ecca27 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
@@ -368,6 +368,8 @@ int noinline btrfs_drop_extents(struct btrfs_trans_handle *trans, | |||
368 | u64 search_start = start; | 368 | u64 search_start = start; |
369 | u64 leaf_start; | 369 | u64 leaf_start; |
370 | u64 ram_bytes = 0; | 370 | u64 ram_bytes = 0; |
371 | u64 orig_parent = 0; | ||
372 | u64 disk_bytenr = 0; | ||
371 | u8 compression; | 373 | u8 compression; |
372 | u8 encryption; | 374 | u8 encryption; |
373 | u16 other_encoding = 0; | 375 | u16 other_encoding = 0; |
@@ -500,17 +502,31 @@ next_slot: | |||
500 | keep = 1; | 502 | keep = 1; |
501 | } | 503 | } |
502 | 504 | ||
503 | if (bookend && found_extent && locked_end < extent_end) { | 505 | if (bookend && found_extent) { |
504 | ret = try_lock_extent(&BTRFS_I(inode)->io_tree, | 506 | if (locked_end < extent_end) { |
505 | locked_end, extent_end - 1, GFP_NOFS); | 507 | ret = try_lock_extent(&BTRFS_I(inode)->io_tree, |
506 | if (!ret) { | 508 | locked_end, extent_end - 1, |
507 | btrfs_release_path(root, path); | 509 | GFP_NOFS); |
508 | lock_extent(&BTRFS_I(inode)->io_tree, | 510 | if (!ret) { |
509 | locked_end, extent_end - 1, GFP_NOFS); | 511 | btrfs_release_path(root, path); |
512 | lock_extent(&BTRFS_I(inode)->io_tree, | ||
513 | locked_end, extent_end - 1, | ||
514 | GFP_NOFS); | ||
515 | locked_end = extent_end; | ||
516 | continue; | ||
517 | } | ||
510 | locked_end = extent_end; | 518 | locked_end = extent_end; |
511 | continue; | ||
512 | } | 519 | } |
513 | locked_end = extent_end; | 520 | orig_parent = path->nodes[0]->start; |
521 | disk_bytenr = le64_to_cpu(old.disk_bytenr); | ||
522 | if (disk_bytenr != 0) { | ||
523 | ret = btrfs_inc_extent_ref(trans, root, | ||
524 | disk_bytenr, | ||
525 | le64_to_cpu(old.disk_num_bytes), | ||
526 | orig_parent, root->root_key.objectid, | ||
527 | trans->transid, inode->i_ino); | ||
528 | BUG_ON(ret); | ||
529 | } | ||
514 | } | 530 | } |
515 | 531 | ||
516 | if (found_inline) { | 532 | if (found_inline) { |
@@ -537,8 +553,12 @@ next_slot: | |||
537 | inode_sub_bytes(inode, old_num - | 553 | inode_sub_bytes(inode, old_num - |
538 | new_num); | 554 | new_num); |
539 | } | 555 | } |
540 | btrfs_set_file_extent_num_bytes(leaf, extent, | 556 | if (!compression && !encryption) { |
541 | new_num); | 557 | btrfs_set_file_extent_ram_bytes(leaf, |
558 | extent, new_num); | ||
559 | } | ||
560 | btrfs_set_file_extent_num_bytes(leaf, | ||
561 | extent, new_num); | ||
542 | btrfs_mark_buffer_dirty(leaf); | 562 | btrfs_mark_buffer_dirty(leaf); |
543 | } else if (key.offset < inline_limit && | 563 | } else if (key.offset < inline_limit && |
544 | (end > extent_end) && | 564 | (end > extent_end) && |
@@ -582,11 +602,11 @@ next_slot: | |||
582 | } | 602 | } |
583 | /* create bookend, splitting the extent in two */ | 603 | /* create bookend, splitting the extent in two */ |
584 | if (bookend && found_extent) { | 604 | if (bookend && found_extent) { |
585 | u64 disk_bytenr; | ||
586 | struct btrfs_key ins; | 605 | struct btrfs_key ins; |
587 | ins.objectid = inode->i_ino; | 606 | ins.objectid = inode->i_ino; |
588 | ins.offset = end; | 607 | ins.offset = end; |
589 | btrfs_set_key_type(&ins, BTRFS_EXTENT_DATA_KEY); | 608 | btrfs_set_key_type(&ins, BTRFS_EXTENT_DATA_KEY); |
609 | |||
590 | btrfs_release_path(root, path); | 610 | btrfs_release_path(root, path); |
591 | ret = btrfs_insert_empty_item(trans, root, path, &ins, | 611 | ret = btrfs_insert_empty_item(trans, root, path, &ins, |
592 | sizeof(*extent)); | 612 | sizeof(*extent)); |
@@ -623,14 +643,13 @@ next_slot: | |||
623 | 643 | ||
624 | btrfs_mark_buffer_dirty(path->nodes[0]); | 644 | btrfs_mark_buffer_dirty(path->nodes[0]); |
625 | 645 | ||
626 | disk_bytenr = le64_to_cpu(old.disk_bytenr); | ||
627 | if (disk_bytenr != 0) { | 646 | if (disk_bytenr != 0) { |
628 | ret = btrfs_inc_extent_ref(trans, root, | 647 | ret = btrfs_update_extent_ref(trans, root, |
629 | disk_bytenr, | 648 | disk_bytenr, orig_parent, |
630 | le64_to_cpu(old.disk_num_bytes), | 649 | leaf->start, |
631 | leaf->start, | ||
632 | root->root_key.objectid, | 650 | root->root_key.objectid, |
633 | trans->transid, ins.objectid); | 651 | trans->transid, ins.objectid); |
652 | |||
634 | BUG_ON(ret); | 653 | BUG_ON(ret); |
635 | } | 654 | } |
636 | btrfs_release_path(root, path); | 655 | btrfs_release_path(root, path); |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 3df0ffad976e..e01c0d0310ab 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -86,6 +86,10 @@ static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = { | |||
86 | 86 | ||
87 | static void btrfs_truncate(struct inode *inode); | 87 | static void btrfs_truncate(struct inode *inode); |
88 | static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end); | 88 | static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end); |
89 | static noinline int cow_file_range(struct inode *inode, | ||
90 | struct page *locked_page, | ||
91 | u64 start, u64 end, int *page_started, | ||
92 | unsigned long *nr_written, int unlock); | ||
89 | 93 | ||
90 | /* | 94 | /* |
91 | * a very lame attempt at stopping writes when the FS is 85% full. There | 95 | * a very lame attempt at stopping writes when the FS is 85% full. There |
@@ -262,35 +266,72 @@ static int cow_file_range_inline(struct btrfs_trans_handle *trans, | |||
262 | return 0; | 266 | return 0; |
263 | } | 267 | } |
264 | 268 | ||
269 | struct async_extent { | ||
270 | u64 start; | ||
271 | u64 ram_size; | ||
272 | u64 compressed_size; | ||
273 | struct page **pages; | ||
274 | unsigned long nr_pages; | ||
275 | struct list_head list; | ||
276 | }; | ||
277 | |||
278 | struct async_cow { | ||
279 | struct inode *inode; | ||
280 | struct btrfs_root *root; | ||
281 | struct page *locked_page; | ||
282 | u64 start; | ||
283 | u64 end; | ||
284 | struct list_head extents; | ||
285 | struct btrfs_work work; | ||
286 | }; | ||
287 | |||
288 | static noinline int add_async_extent(struct async_cow *cow, | ||
289 | u64 start, u64 ram_size, | ||
290 | u64 compressed_size, | ||
291 | struct page **pages, | ||
292 | unsigned long nr_pages) | ||
293 | { | ||
294 | struct async_extent *async_extent; | ||
295 | |||
296 | async_extent = kmalloc(sizeof(*async_extent), GFP_NOFS); | ||
297 | async_extent->start = start; | ||
298 | async_extent->ram_size = ram_size; | ||
299 | async_extent->compressed_size = compressed_size; | ||
300 | async_extent->pages = pages; | ||
301 | async_extent->nr_pages = nr_pages; | ||
302 | list_add_tail(&async_extent->list, &cow->extents); | ||
303 | return 0; | ||
304 | } | ||
305 | |||
265 | /* | 306 | /* |
266 | * when extent_io.c finds a delayed allocation range in the file, | 307 | * we create compressed extents in two phases. The first |
267 | * the call backs end up in this code. The basic idea is to | 308 | * phase compresses a range of pages that have already been |
268 | * allocate extents on disk for the range, and create ordered data structs | 309 | * locked (both pages and state bits are locked). |
269 | * in ram to track those extents. | ||
270 | * | 310 | * |
271 | * locked_page is the page that writepage had locked already. We use | 311 | * This is done inside an ordered work queue, and the compression |
272 | * it to make sure we don't do extra locks or unlocks. | 312 | * is spread across many cpus. The actual IO submission is step |
313 | * two, and the ordered work queue takes care of making sure that | ||
314 | * happens in the same order things were put onto the queue by | ||
315 | * writepages and friends. | ||
273 | * | 316 | * |
274 | * *page_started is set to one if we unlock locked_page and do everything | 317 | * If this code finds it can't get good compression, it puts an |
275 | * required to start IO on it. It may be clean and already done with | 318 | * entry onto the work queue to write the uncompressed bytes. This |
276 | * IO when we return. | 319 | * makes sure that both compressed inodes and uncompressed inodes |
320 | * are written in the same order that pdflush sent them down. | ||
277 | */ | 321 | */ |
278 | static int cow_file_range(struct inode *inode, struct page *locked_page, | 322 | static noinline int compress_file_range(struct inode *inode, |
279 | u64 start, u64 end, int *page_started) | 323 | struct page *locked_page, |
324 | u64 start, u64 end, | ||
325 | struct async_cow *async_cow, | ||
326 | int *num_added) | ||
280 | { | 327 | { |
281 | struct btrfs_root *root = BTRFS_I(inode)->root; | 328 | struct btrfs_root *root = BTRFS_I(inode)->root; |
282 | struct btrfs_trans_handle *trans; | 329 | struct btrfs_trans_handle *trans; |
283 | u64 alloc_hint = 0; | ||
284 | u64 num_bytes; | 330 | u64 num_bytes; |
285 | unsigned long ram_size; | ||
286 | u64 orig_start; | 331 | u64 orig_start; |
287 | u64 disk_num_bytes; | 332 | u64 disk_num_bytes; |
288 | u64 cur_alloc_size; | ||
289 | u64 blocksize = root->sectorsize; | 333 | u64 blocksize = root->sectorsize; |
290 | u64 actual_end; | 334 | u64 actual_end; |
291 | struct btrfs_key ins; | ||
292 | struct extent_map *em; | ||
293 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; | ||
294 | int ret = 0; | 335 | int ret = 0; |
295 | struct page **pages = NULL; | 336 | struct page **pages = NULL; |
296 | unsigned long nr_pages; | 337 | unsigned long nr_pages; |
@@ -298,22 +339,12 @@ static int cow_file_range(struct inode *inode, struct page *locked_page, | |||
298 | unsigned long total_compressed = 0; | 339 | unsigned long total_compressed = 0; |
299 | unsigned long total_in = 0; | 340 | unsigned long total_in = 0; |
300 | unsigned long max_compressed = 128 * 1024; | 341 | unsigned long max_compressed = 128 * 1024; |
301 | unsigned long max_uncompressed = 256 * 1024; | 342 | unsigned long max_uncompressed = 128 * 1024; |
302 | int i; | 343 | int i; |
303 | int ordered_type; | ||
304 | int will_compress; | 344 | int will_compress; |
305 | 345 | ||
306 | trans = btrfs_join_transaction(root, 1); | ||
307 | BUG_ON(!trans); | ||
308 | btrfs_set_trans_block_group(trans, inode); | ||
309 | orig_start = start; | 346 | orig_start = start; |
310 | 347 | ||
311 | /* | ||
312 | * compression made this loop a bit ugly, but the basic idea is to | ||
313 | * compress some pages but keep the total size of the compressed | ||
314 | * extent relatively small. If compression is off, this goto target | ||
315 | * is never used. | ||
316 | */ | ||
317 | again: | 348 | again: |
318 | will_compress = 0; | 349 | will_compress = 0; |
319 | nr_pages = (end >> PAGE_CACHE_SHIFT) - (start >> PAGE_CACHE_SHIFT) + 1; | 350 | nr_pages = (end >> PAGE_CACHE_SHIFT) - (start >> PAGE_CACHE_SHIFT) + 1; |
@@ -324,7 +355,13 @@ again: | |||
324 | 355 | ||
325 | /* we want to make sure that amount of ram required to uncompress | 356 | /* we want to make sure that amount of ram required to uncompress |
326 | * an extent is reasonable, so we limit the total size in ram | 357 | * an extent is reasonable, so we limit the total size in ram |
327 | * of a compressed extent to 256k | 358 | * of a compressed extent to 128k. This is a crucial number |
359 | * because it also controls how easily we can spread reads across | ||
360 | * cpus for decompression. | ||
361 | * | ||
362 | * We also want to make sure the amount of IO required to do | ||
363 | * a random read is reasonably small, so we limit the size of | ||
364 | * a compressed extent to 128k. | ||
328 | */ | 365 | */ |
329 | total_compressed = min(total_compressed, max_uncompressed); | 366 | total_compressed = min(total_compressed, max_uncompressed); |
330 | num_bytes = (end - start + blocksize) & ~(blocksize - 1); | 367 | num_bytes = (end - start + blocksize) & ~(blocksize - 1); |
@@ -333,18 +370,16 @@ again: | |||
333 | total_in = 0; | 370 | total_in = 0; |
334 | ret = 0; | 371 | ret = 0; |
335 | 372 | ||
336 | /* we do compression for mount -o compress and when the | 373 | /* |
337 | * inode has not been flagged as nocompress | 374 | * we do compression for mount -o compress and when the |
375 | * inode has not been flagged as nocompress. This flag can | ||
376 | * change at any time if we discover bad compression ratios. | ||
338 | */ | 377 | */ |
339 | if (!btrfs_test_flag(inode, NOCOMPRESS) && | 378 | if (!btrfs_test_flag(inode, NOCOMPRESS) && |
340 | btrfs_test_opt(root, COMPRESS)) { | 379 | btrfs_test_opt(root, COMPRESS)) { |
341 | WARN_ON(pages); | 380 | WARN_ON(pages); |
342 | pages = kzalloc(sizeof(struct page *) * nr_pages, GFP_NOFS); | 381 | pages = kzalloc(sizeof(struct page *) * nr_pages, GFP_NOFS); |
343 | 382 | ||
344 | /* we want to make sure the amount of IO required to satisfy | ||
345 | * a random read is reasonably small, so we limit the size | ||
346 | * of a compressed extent to 128k | ||
347 | */ | ||
348 | ret = btrfs_zlib_compress_pages(inode->i_mapping, start, | 383 | ret = btrfs_zlib_compress_pages(inode->i_mapping, start, |
349 | total_compressed, pages, | 384 | total_compressed, pages, |
350 | nr_pages, &nr_pages_ret, | 385 | nr_pages, &nr_pages_ret, |
@@ -371,26 +406,34 @@ again: | |||
371 | } | 406 | } |
372 | } | 407 | } |
373 | if (start == 0) { | 408 | if (start == 0) { |
409 | trans = btrfs_join_transaction(root, 1); | ||
410 | BUG_ON(!trans); | ||
411 | btrfs_set_trans_block_group(trans, inode); | ||
412 | |||
374 | /* lets try to make an inline extent */ | 413 | /* lets try to make an inline extent */ |
375 | if (ret || total_in < (end - start + 1)) { | 414 | if (ret || total_in < (actual_end - start)) { |
376 | /* we didn't compress the entire range, try | 415 | /* we didn't compress the entire range, try |
377 | * to make an uncompressed inline extent. This | 416 | * to make an uncompressed inline extent. |
378 | * is almost sure to fail, but maybe inline sizes | ||
379 | * will get bigger later | ||
380 | */ | 417 | */ |
381 | ret = cow_file_range_inline(trans, root, inode, | 418 | ret = cow_file_range_inline(trans, root, inode, |
382 | start, end, 0, NULL); | 419 | start, end, 0, NULL); |
383 | } else { | 420 | } else { |
421 | /* try making a compressed inline extent */ | ||
384 | ret = cow_file_range_inline(trans, root, inode, | 422 | ret = cow_file_range_inline(trans, root, inode, |
385 | start, end, | 423 | start, end, |
386 | total_compressed, pages); | 424 | total_compressed, pages); |
387 | } | 425 | } |
426 | btrfs_end_transaction(trans, root); | ||
388 | if (ret == 0) { | 427 | if (ret == 0) { |
428 | /* | ||
429 | * inline extent creation worked, we don't need | ||
430 | * to create any more async work items. Unlock | ||
431 | * and free up our temp pages. | ||
432 | */ | ||
389 | extent_clear_unlock_delalloc(inode, | 433 | extent_clear_unlock_delalloc(inode, |
390 | &BTRFS_I(inode)->io_tree, | 434 | &BTRFS_I(inode)->io_tree, |
391 | start, end, NULL, | 435 | start, end, NULL, 1, 0, |
392 | 1, 1, 1); | 436 | 0, 1, 1, 1); |
393 | *page_started = 1; | ||
394 | ret = 0; | 437 | ret = 0; |
395 | goto free_pages_out; | 438 | goto free_pages_out; |
396 | } | 439 | } |
@@ -435,53 +478,280 @@ again: | |||
435 | /* flag the file so we don't compress in the future */ | 478 | /* flag the file so we don't compress in the future */ |
436 | btrfs_set_flag(inode, NOCOMPRESS); | 479 | btrfs_set_flag(inode, NOCOMPRESS); |
437 | } | 480 | } |
481 | if (will_compress) { | ||
482 | *num_added += 1; | ||
438 | 483 | ||
439 | BUG_ON(disk_num_bytes > | 484 | /* the async work queues will take care of doing actual |
440 | btrfs_super_total_bytes(&root->fs_info->super_copy)); | 485 | * allocation on disk for these compressed pages, |
486 | * and will submit them to the elevator. | ||
487 | */ | ||
488 | add_async_extent(async_cow, start, num_bytes, | ||
489 | total_compressed, pages, nr_pages_ret); | ||
441 | 490 | ||
442 | btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0); | 491 | if (start + num_bytes < end) { |
492 | start += num_bytes; | ||
493 | pages = NULL; | ||
494 | cond_resched(); | ||
495 | goto again; | ||
496 | } | ||
497 | } else { | ||
498 | /* | ||
499 | * No compression, but we still need to write the pages in | ||
500 | * the file we've been given so far. redirty the locked | ||
501 | * page if it corresponds to our extent and set things up | ||
502 | * for the async work queue to run cow_file_range to do | ||
503 | * the normal delalloc dance | ||
504 | */ | ||
505 | if (page_offset(locked_page) >= start && | ||
506 | page_offset(locked_page) <= end) { | ||
507 | __set_page_dirty_nobuffers(locked_page); | ||
508 | /* unlocked later on in the async handlers */ | ||
509 | } | ||
510 | add_async_extent(async_cow, start, end - start + 1, 0, NULL, 0); | ||
511 | *num_added += 1; | ||
512 | } | ||
443 | 513 | ||
444 | while(disk_num_bytes > 0) { | 514 | out: |
445 | unsigned long min_bytes; | 515 | return 0; |
516 | |||
517 | free_pages_out: | ||
518 | for (i = 0; i < nr_pages_ret; i++) { | ||
519 | WARN_ON(pages[i]->mapping); | ||
520 | page_cache_release(pages[i]); | ||
521 | } | ||
522 | if (pages) | ||
523 | kfree(pages); | ||
524 | |||
525 | goto out; | ||
526 | } | ||
527 | |||
528 | /* | ||
529 | * phase two of compressed writeback. This is the ordered portion | ||
530 | * of the code, which only gets called in the order the work was | ||
531 | * queued. We walk all the async extents created by compress_file_range | ||
532 | * and send them down to the disk. | ||
533 | */ | ||
534 | static noinline int submit_compressed_extents(struct inode *inode, | ||
535 | struct async_cow *async_cow) | ||
536 | { | ||
537 | struct async_extent *async_extent; | ||
538 | u64 alloc_hint = 0; | ||
539 | struct btrfs_trans_handle *trans; | ||
540 | struct btrfs_key ins; | ||
541 | struct extent_map *em; | ||
542 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
543 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; | ||
544 | struct extent_io_tree *io_tree; | ||
545 | int ret; | ||
546 | |||
547 | if (list_empty(&async_cow->extents)) | ||
548 | return 0; | ||
549 | |||
550 | trans = btrfs_join_transaction(root, 1); | ||
551 | |||
552 | while(!list_empty(&async_cow->extents)) { | ||
553 | async_extent = list_entry(async_cow->extents.next, | ||
554 | struct async_extent, list); | ||
555 | list_del(&async_extent->list); | ||
446 | 556 | ||
557 | io_tree = &BTRFS_I(inode)->io_tree; | ||
558 | |||
559 | /* did the compression code fall back to uncompressed IO? */ | ||
560 | if (!async_extent->pages) { | ||
561 | int page_started = 0; | ||
562 | unsigned long nr_written = 0; | ||
563 | |||
564 | lock_extent(io_tree, async_extent->start, | ||
565 | async_extent->start + async_extent->ram_size - 1, | ||
566 | GFP_NOFS); | ||
567 | |||
568 | /* allocate blocks */ | ||
569 | cow_file_range(inode, async_cow->locked_page, | ||
570 | async_extent->start, | ||
571 | async_extent->start + | ||
572 | async_extent->ram_size - 1, | ||
573 | &page_started, &nr_written, 0); | ||
574 | |||
575 | /* | ||
576 | * if page_started, cow_file_range inserted an | ||
577 | * inline extent and took care of all the unlocking | ||
578 | * and IO for us. Otherwise, we need to submit | ||
579 | * all those pages down to the drive. | ||
580 | */ | ||
581 | if (!page_started) | ||
582 | extent_write_locked_range(io_tree, | ||
583 | inode, async_extent->start, | ||
584 | async_extent->start + | ||
585 | async_extent->ram_size - 1, | ||
586 | btrfs_get_extent, | ||
587 | WB_SYNC_ALL); | ||
588 | kfree(async_extent); | ||
589 | cond_resched(); | ||
590 | continue; | ||
591 | } | ||
592 | |||
593 | lock_extent(io_tree, async_extent->start, | ||
594 | async_extent->start + async_extent->ram_size - 1, | ||
595 | GFP_NOFS); | ||
447 | /* | 596 | /* |
448 | * the max size of a compressed extent is pretty small, | 597 | * here we're doing allocation and writeback of the |
449 | * make the code a little less complex by forcing | 598 | * compressed pages |
450 | * the allocator to find a whole compressed extent at once | ||
451 | */ | 599 | */ |
452 | if (will_compress) | 600 | btrfs_drop_extent_cache(inode, async_extent->start, |
453 | min_bytes = disk_num_bytes; | 601 | async_extent->start + |
454 | else | 602 | async_extent->ram_size - 1, 0); |
455 | min_bytes = root->sectorsize; | 603 | |
604 | ret = btrfs_reserve_extent(trans, root, | ||
605 | async_extent->compressed_size, | ||
606 | async_extent->compressed_size, | ||
607 | 0, alloc_hint, | ||
608 | (u64)-1, &ins, 1); | ||
609 | BUG_ON(ret); | ||
610 | em = alloc_extent_map(GFP_NOFS); | ||
611 | em->start = async_extent->start; | ||
612 | em->len = async_extent->ram_size; | ||
613 | |||
614 | em->block_start = ins.objectid; | ||
615 | em->block_len = ins.offset; | ||
616 | em->bdev = root->fs_info->fs_devices->latest_bdev; | ||
617 | set_bit(EXTENT_FLAG_PINNED, &em->flags); | ||
618 | set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); | ||
619 | |||
620 | while(1) { | ||
621 | spin_lock(&em_tree->lock); | ||
622 | ret = add_extent_mapping(em_tree, em); | ||
623 | spin_unlock(&em_tree->lock); | ||
624 | if (ret != -EEXIST) { | ||
625 | free_extent_map(em); | ||
626 | break; | ||
627 | } | ||
628 | btrfs_drop_extent_cache(inode, async_extent->start, | ||
629 | async_extent->start + | ||
630 | async_extent->ram_size - 1, 0); | ||
631 | } | ||
632 | |||
633 | ret = btrfs_add_ordered_extent(inode, async_extent->start, | ||
634 | ins.objectid, | ||
635 | async_extent->ram_size, | ||
636 | ins.offset, | ||
637 | BTRFS_ORDERED_COMPRESSED); | ||
638 | BUG_ON(ret); | ||
639 | |||
640 | btrfs_end_transaction(trans, root); | ||
641 | |||
642 | /* | ||
643 | * clear dirty, set writeback and unlock the pages. | ||
644 | */ | ||
645 | extent_clear_unlock_delalloc(inode, | ||
646 | &BTRFS_I(inode)->io_tree, | ||
647 | async_extent->start, | ||
648 | async_extent->start + | ||
649 | async_extent->ram_size - 1, | ||
650 | NULL, 1, 1, 0, 1, 1, 0); | ||
651 | |||
652 | ret = btrfs_submit_compressed_write(inode, | ||
653 | async_extent->start, | ||
654 | async_extent->ram_size, | ||
655 | ins.objectid, | ||
656 | ins.offset, async_extent->pages, | ||
657 | async_extent->nr_pages); | ||
658 | |||
659 | BUG_ON(ret); | ||
660 | trans = btrfs_join_transaction(root, 1); | ||
661 | alloc_hint = ins.objectid + ins.offset; | ||
662 | kfree(async_extent); | ||
663 | cond_resched(); | ||
664 | } | ||
665 | |||
666 | btrfs_end_transaction(trans, root); | ||
667 | return 0; | ||
668 | } | ||
669 | |||
670 | /* | ||
671 | * when extent_io.c finds a delayed allocation range in the file, | ||
672 | * the call backs end up in this code. The basic idea is to | ||
673 | * allocate extents on disk for the range, and create ordered data structs | ||
674 | * in ram to track those extents. | ||
675 | * | ||
676 | * locked_page is the page that writepage had locked already. We use | ||
677 | * it to make sure we don't do extra locks or unlocks. | ||
678 | * | ||
679 | * *page_started is set to one if we unlock locked_page and do everything | ||
680 | * required to start IO on it. It may be clean and already done with | ||
681 | * IO when we return. | ||
682 | */ | ||
683 | static noinline int cow_file_range(struct inode *inode, | ||
684 | struct page *locked_page, | ||
685 | u64 start, u64 end, int *page_started, | ||
686 | unsigned long *nr_written, | ||
687 | int unlock) | ||
688 | { | ||
689 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
690 | struct btrfs_trans_handle *trans; | ||
691 | u64 alloc_hint = 0; | ||
692 | u64 num_bytes; | ||
693 | unsigned long ram_size; | ||
694 | u64 disk_num_bytes; | ||
695 | u64 cur_alloc_size; | ||
696 | u64 blocksize = root->sectorsize; | ||
697 | u64 actual_end; | ||
698 | struct btrfs_key ins; | ||
699 | struct extent_map *em; | ||
700 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; | ||
701 | int ret = 0; | ||
702 | |||
703 | trans = btrfs_join_transaction(root, 1); | ||
704 | BUG_ON(!trans); | ||
705 | btrfs_set_trans_block_group(trans, inode); | ||
456 | 706 | ||
707 | actual_end = min_t(u64, i_size_read(inode), end + 1); | ||
708 | |||
709 | num_bytes = (end - start + blocksize) & ~(blocksize - 1); | ||
710 | num_bytes = max(blocksize, num_bytes); | ||
711 | disk_num_bytes = num_bytes; | ||
712 | ret = 0; | ||
713 | |||
714 | if (start == 0) { | ||
715 | /* lets try to make an inline extent */ | ||
716 | ret = cow_file_range_inline(trans, root, inode, | ||
717 | start, end, 0, NULL); | ||
718 | if (ret == 0) { | ||
719 | extent_clear_unlock_delalloc(inode, | ||
720 | &BTRFS_I(inode)->io_tree, | ||
721 | start, end, NULL, 1, 1, | ||
722 | 1, 1, 1, 1); | ||
723 | *nr_written = *nr_written + | ||
724 | (end - start + PAGE_CACHE_SIZE) / PAGE_CACHE_SIZE; | ||
725 | *page_started = 1; | ||
726 | ret = 0; | ||
727 | goto out; | ||
728 | } | ||
729 | } | ||
730 | |||
731 | BUG_ON(disk_num_bytes > | ||
732 | btrfs_super_total_bytes(&root->fs_info->super_copy)); | ||
733 | |||
734 | btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0); | ||
735 | |||
736 | while(disk_num_bytes > 0) { | ||
457 | cur_alloc_size = min(disk_num_bytes, root->fs_info->max_extent); | 737 | cur_alloc_size = min(disk_num_bytes, root->fs_info->max_extent); |
458 | ret = btrfs_reserve_extent(trans, root, cur_alloc_size, | 738 | ret = btrfs_reserve_extent(trans, root, cur_alloc_size, |
459 | min_bytes, 0, alloc_hint, | 739 | root->sectorsize, 0, alloc_hint, |
460 | (u64)-1, &ins, 1); | 740 | (u64)-1, &ins, 1); |
461 | if (ret) { | 741 | if (ret) { |
462 | WARN_ON(1); | 742 | BUG(); |
463 | goto free_pages_out_fail; | ||
464 | } | 743 | } |
465 | em = alloc_extent_map(GFP_NOFS); | 744 | em = alloc_extent_map(GFP_NOFS); |
466 | em->start = start; | 745 | em->start = start; |
467 | 746 | ||
468 | if (will_compress) { | 747 | ram_size = ins.offset; |
469 | ram_size = num_bytes; | 748 | em->len = ins.offset; |
470 | em->len = num_bytes; | ||
471 | } else { | ||
472 | /* ramsize == disk size */ | ||
473 | ram_size = ins.offset; | ||
474 | em->len = ins.offset; | ||
475 | } | ||
476 | 749 | ||
477 | em->block_start = ins.objectid; | 750 | em->block_start = ins.objectid; |
478 | em->block_len = ins.offset; | 751 | em->block_len = ins.offset; |
479 | em->bdev = root->fs_info->fs_devices->latest_bdev; | 752 | em->bdev = root->fs_info->fs_devices->latest_bdev; |
480 | set_bit(EXTENT_FLAG_PINNED, &em->flags); | 753 | set_bit(EXTENT_FLAG_PINNED, &em->flags); |
481 | 754 | ||
482 | if (will_compress) | ||
483 | set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); | ||
484 | |||
485 | while(1) { | 755 | while(1) { |
486 | spin_lock(&em_tree->lock); | 756 | spin_lock(&em_tree->lock); |
487 | ret = add_extent_mapping(em_tree, em); | 757 | ret = add_extent_mapping(em_tree, em); |
@@ -495,10 +765,8 @@ again: | |||
495 | } | 765 | } |
496 | 766 | ||
497 | cur_alloc_size = ins.offset; | 767 | cur_alloc_size = ins.offset; |
498 | ordered_type = will_compress ? BTRFS_ORDERED_COMPRESSED : 0; | ||
499 | ret = btrfs_add_ordered_extent(inode, start, ins.objectid, | 768 | ret = btrfs_add_ordered_extent(inode, start, ins.objectid, |
500 | ram_size, cur_alloc_size, | 769 | ram_size, cur_alloc_size, 0); |
501 | ordered_type); | ||
502 | BUG_ON(ret); | 770 | BUG_ON(ret); |
503 | 771 | ||
504 | if (disk_num_bytes < cur_alloc_size) { | 772 | if (disk_num_bytes < cur_alloc_size) { |
@@ -506,82 +774,145 @@ again: | |||
506 | cur_alloc_size); | 774 | cur_alloc_size); |
507 | break; | 775 | break; |
508 | } | 776 | } |
509 | |||
510 | if (will_compress) { | ||
511 | /* | ||
512 | * we're doing compression, we and we need to | ||
513 | * submit the compressed extents down to the device. | ||
514 | * | ||
515 | * We lock down all the file pages, clearing their | ||
516 | * dirty bits and setting them writeback. Everyone | ||
517 | * that wants to modify the page will wait on the | ||
518 | * ordered extent above. | ||
519 | * | ||
520 | * The writeback bits on the file pages are | ||
521 | * cleared when the compressed pages are on disk | ||
522 | */ | ||
523 | btrfs_end_transaction(trans, root); | ||
524 | |||
525 | if (start <= page_offset(locked_page) && | ||
526 | page_offset(locked_page) < start + ram_size) { | ||
527 | *page_started = 1; | ||
528 | } | ||
529 | |||
530 | extent_clear_unlock_delalloc(inode, | ||
531 | &BTRFS_I(inode)->io_tree, | ||
532 | start, | ||
533 | start + ram_size - 1, | ||
534 | NULL, 1, 1, 0); | ||
535 | |||
536 | ret = btrfs_submit_compressed_write(inode, start, | ||
537 | ram_size, ins.objectid, | ||
538 | cur_alloc_size, pages, | ||
539 | nr_pages_ret); | ||
540 | |||
541 | BUG_ON(ret); | ||
542 | trans = btrfs_join_transaction(root, 1); | ||
543 | if (start + ram_size < end) { | ||
544 | start += ram_size; | ||
545 | alloc_hint = ins.objectid + ins.offset; | ||
546 | /* pages will be freed at end_bio time */ | ||
547 | pages = NULL; | ||
548 | goto again; | ||
549 | } else { | ||
550 | /* we've written everything, time to go */ | ||
551 | break; | ||
552 | } | ||
553 | } | ||
554 | /* we're not doing compressed IO, don't unlock the first | 777 | /* we're not doing compressed IO, don't unlock the first |
555 | * page (which the caller expects to stay locked), don't | 778 | * page (which the caller expects to stay locked), don't |
556 | * clear any dirty bits and don't set any writeback bits | 779 | * clear any dirty bits and don't set any writeback bits |
557 | */ | 780 | */ |
558 | extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, | 781 | extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, |
559 | start, start + ram_size - 1, | 782 | start, start + ram_size - 1, |
560 | locked_page, 0, 0, 0); | 783 | locked_page, unlock, 1, |
784 | 1, 0, 0, 0); | ||
561 | disk_num_bytes -= cur_alloc_size; | 785 | disk_num_bytes -= cur_alloc_size; |
562 | num_bytes -= cur_alloc_size; | 786 | num_bytes -= cur_alloc_size; |
563 | alloc_hint = ins.objectid + ins.offset; | 787 | alloc_hint = ins.objectid + ins.offset; |
564 | start += cur_alloc_size; | 788 | start += cur_alloc_size; |
565 | } | 789 | } |
566 | |||
567 | ret = 0; | ||
568 | out: | 790 | out: |
791 | ret = 0; | ||
569 | btrfs_end_transaction(trans, root); | 792 | btrfs_end_transaction(trans, root); |
570 | 793 | ||
571 | return ret; | 794 | return ret; |
795 | } | ||
572 | 796 | ||
573 | free_pages_out_fail: | 797 | /* |
574 | extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, | 798 | * work queue call back to started compression on a file and pages |
575 | start, end, locked_page, 0, 0, 0); | 799 | */ |
576 | free_pages_out: | 800 | static noinline void async_cow_start(struct btrfs_work *work) |
577 | for (i = 0; i < nr_pages_ret; i++) { | 801 | { |
578 | WARN_ON(pages[i]->mapping); | 802 | struct async_cow *async_cow; |
579 | page_cache_release(pages[i]); | 803 | int num_added = 0; |
804 | async_cow = container_of(work, struct async_cow, work); | ||
805 | |||
806 | compress_file_range(async_cow->inode, async_cow->locked_page, | ||
807 | async_cow->start, async_cow->end, async_cow, | ||
808 | &num_added); | ||
809 | if (num_added == 0) | ||
810 | async_cow->inode = NULL; | ||
811 | } | ||
812 | |||
813 | /* | ||
814 | * work queue call back to submit previously compressed pages | ||
815 | */ | ||
816 | static noinline void async_cow_submit(struct btrfs_work *work) | ||
817 | { | ||
818 | struct async_cow *async_cow; | ||
819 | struct btrfs_root *root; | ||
820 | unsigned long nr_pages; | ||
821 | |||
822 | async_cow = container_of(work, struct async_cow, work); | ||
823 | |||
824 | root = async_cow->root; | ||
825 | nr_pages = (async_cow->end - async_cow->start + PAGE_CACHE_SIZE) >> | ||
826 | PAGE_CACHE_SHIFT; | ||
827 | |||
828 | atomic_sub(nr_pages, &root->fs_info->async_delalloc_pages); | ||
829 | |||
830 | if (atomic_read(&root->fs_info->async_delalloc_pages) < | ||
831 | 5 * 1042 * 1024 && | ||
832 | waitqueue_active(&root->fs_info->async_submit_wait)) | ||
833 | wake_up(&root->fs_info->async_submit_wait); | ||
834 | |||
835 | if (async_cow->inode) { | ||
836 | submit_compressed_extents(async_cow->inode, async_cow); | ||
580 | } | 837 | } |
581 | if (pages) | 838 | } |
582 | kfree(pages); | ||
583 | 839 | ||
584 | goto out; | 840 | static noinline void async_cow_free(struct btrfs_work *work) |
841 | { | ||
842 | struct async_cow *async_cow; | ||
843 | async_cow = container_of(work, struct async_cow, work); | ||
844 | kfree(async_cow); | ||
845 | } | ||
846 | |||
847 | static int cow_file_range_async(struct inode *inode, struct page *locked_page, | ||
848 | u64 start, u64 end, int *page_started, | ||
849 | unsigned long *nr_written) | ||
850 | { | ||
851 | struct async_cow *async_cow; | ||
852 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
853 | unsigned long nr_pages; | ||
854 | u64 cur_end; | ||
855 | int limit = 10 * 1024 * 1042; | ||
856 | |||
857 | if (!btrfs_test_opt(root, COMPRESS)) { | ||
858 | return cow_file_range(inode, locked_page, start, end, | ||
859 | page_started, nr_written, 1); | ||
860 | } | ||
861 | |||
862 | clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, EXTENT_LOCKED | | ||
863 | EXTENT_DELALLOC, 1, 0, GFP_NOFS); | ||
864 | while(start < end) { | ||
865 | async_cow = kmalloc(sizeof(*async_cow), GFP_NOFS); | ||
866 | async_cow->inode = inode; | ||
867 | async_cow->root = root; | ||
868 | async_cow->locked_page = locked_page; | ||
869 | async_cow->start = start; | ||
870 | |||
871 | if (btrfs_test_flag(inode, NOCOMPRESS)) | ||
872 | cur_end = end; | ||
873 | else | ||
874 | cur_end = min(end, start + 512 * 1024 - 1); | ||
875 | |||
876 | async_cow->end = cur_end; | ||
877 | INIT_LIST_HEAD(&async_cow->extents); | ||
878 | |||
879 | async_cow->work.func = async_cow_start; | ||
880 | async_cow->work.ordered_func = async_cow_submit; | ||
881 | async_cow->work.ordered_free = async_cow_free; | ||
882 | async_cow->work.flags = 0; | ||
883 | |||
884 | while(atomic_read(&root->fs_info->async_submit_draining) && | ||
885 | atomic_read(&root->fs_info->async_delalloc_pages)) { | ||
886 | wait_event(root->fs_info->async_submit_wait, | ||
887 | (atomic_read(&root->fs_info->async_delalloc_pages) | ||
888 | == 0)); | ||
889 | } | ||
890 | |||
891 | nr_pages = (cur_end - start + PAGE_CACHE_SIZE) >> | ||
892 | PAGE_CACHE_SHIFT; | ||
893 | atomic_add(nr_pages, &root->fs_info->async_delalloc_pages); | ||
894 | |||
895 | btrfs_queue_worker(&root->fs_info->delalloc_workers, | ||
896 | &async_cow->work); | ||
897 | |||
898 | if (atomic_read(&root->fs_info->async_delalloc_pages) > limit) { | ||
899 | wait_event(root->fs_info->async_submit_wait, | ||
900 | (atomic_read(&root->fs_info->async_delalloc_pages) < | ||
901 | limit)); | ||
902 | } | ||
903 | |||
904 | while(atomic_read(&root->fs_info->async_submit_draining) && | ||
905 | atomic_read(&root->fs_info->async_delalloc_pages)) { | ||
906 | wait_event(root->fs_info->async_submit_wait, | ||
907 | (atomic_read(&root->fs_info->async_delalloc_pages) == | ||
908 | 0)); | ||
909 | } | ||
910 | |||
911 | *nr_written += nr_pages; | ||
912 | start = cur_end + 1; | ||
913 | } | ||
914 | *page_started = 1; | ||
915 | return 0; | ||
585 | } | 916 | } |
586 | 917 | ||
587 | /* | 918 | /* |
@@ -592,7 +923,8 @@ free_pages_out: | |||
592 | * blocks on disk | 923 | * blocks on disk |
593 | */ | 924 | */ |
594 | static int run_delalloc_nocow(struct inode *inode, struct page *locked_page, | 925 | static int run_delalloc_nocow(struct inode *inode, struct page *locked_page, |
595 | u64 start, u64 end, int *page_started, int force) | 926 | u64 start, u64 end, int *page_started, int force, |
927 | unsigned long *nr_written) | ||
596 | { | 928 | { |
597 | struct btrfs_root *root = BTRFS_I(inode)->root; | 929 | struct btrfs_root *root = BTRFS_I(inode)->root; |
598 | struct btrfs_trans_handle *trans; | 930 | struct btrfs_trans_handle *trans; |
@@ -711,7 +1043,8 @@ out_check: | |||
711 | btrfs_release_path(root, path); | 1043 | btrfs_release_path(root, path); |
712 | if (cow_start != (u64)-1) { | 1044 | if (cow_start != (u64)-1) { |
713 | ret = cow_file_range(inode, locked_page, cow_start, | 1045 | ret = cow_file_range(inode, locked_page, cow_start, |
714 | found_key.offset - 1, page_started); | 1046 | found_key.offset - 1, page_started, |
1047 | nr_written, 1); | ||
715 | BUG_ON(ret); | 1048 | BUG_ON(ret); |
716 | cow_start = (u64)-1; | 1049 | cow_start = (u64)-1; |
717 | } | 1050 | } |
@@ -748,9 +1081,10 @@ out_check: | |||
748 | ret = btrfs_add_ordered_extent(inode, cur_offset, disk_bytenr, | 1081 | ret = btrfs_add_ordered_extent(inode, cur_offset, disk_bytenr, |
749 | num_bytes, num_bytes, type); | 1082 | num_bytes, num_bytes, type); |
750 | BUG_ON(ret); | 1083 | BUG_ON(ret); |
1084 | |||
751 | extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, | 1085 | extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, |
752 | cur_offset, cur_offset + num_bytes - 1, | 1086 | cur_offset, cur_offset + num_bytes - 1, |
753 | locked_page, 0, 0, 0); | 1087 | locked_page, 1, 1, 1, 0, 0, 0); |
754 | cur_offset = extent_end; | 1088 | cur_offset = extent_end; |
755 | if (cur_offset > end) | 1089 | if (cur_offset > end) |
756 | break; | 1090 | break; |
@@ -761,7 +1095,7 @@ out_check: | |||
761 | cow_start = cur_offset; | 1095 | cow_start = cur_offset; |
762 | if (cow_start != (u64)-1) { | 1096 | if (cow_start != (u64)-1) { |
763 | ret = cow_file_range(inode, locked_page, cow_start, end, | 1097 | ret = cow_file_range(inode, locked_page, cow_start, end, |
764 | page_started); | 1098 | page_started, nr_written, 1); |
765 | BUG_ON(ret); | 1099 | BUG_ON(ret); |
766 | } | 1100 | } |
767 | 1101 | ||
@@ -775,7 +1109,8 @@ out_check: | |||
775 | * extent_io.c call back to do delayed allocation processing | 1109 | * extent_io.c call back to do delayed allocation processing |
776 | */ | 1110 | */ |
777 | static int run_delalloc_range(struct inode *inode, struct page *locked_page, | 1111 | static int run_delalloc_range(struct inode *inode, struct page *locked_page, |
778 | u64 start, u64 end, int *page_started) | 1112 | u64 start, u64 end, int *page_started, |
1113 | unsigned long *nr_written) | ||
779 | { | 1114 | { |
780 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1115 | struct btrfs_root *root = BTRFS_I(inode)->root; |
781 | int ret; | 1116 | int ret; |
@@ -783,13 +1118,13 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page, | |||
783 | if (btrfs_test_opt(root, NODATACOW) || | 1118 | if (btrfs_test_opt(root, NODATACOW) || |
784 | btrfs_test_flag(inode, NODATACOW)) | 1119 | btrfs_test_flag(inode, NODATACOW)) |
785 | ret = run_delalloc_nocow(inode, locked_page, start, end, | 1120 | ret = run_delalloc_nocow(inode, locked_page, start, end, |
786 | page_started, 0); | 1121 | page_started, 0, nr_written); |
787 | else if (btrfs_test_flag(inode, PREALLOC)) | 1122 | else if (btrfs_test_flag(inode, PREALLOC)) |
788 | ret = run_delalloc_nocow(inode, locked_page, start, end, | 1123 | ret = run_delalloc_nocow(inode, locked_page, start, end, |
789 | page_started, 1); | 1124 | page_started, 1, nr_written); |
790 | else | 1125 | else |
791 | ret = cow_file_range(inode, locked_page, start, end, | 1126 | ret = cow_file_range_async(inode, locked_page, start, end, |
792 | page_started); | 1127 | page_started, nr_written); |
793 | 1128 | ||
794 | return ret; | 1129 | return ret; |
795 | } | 1130 | } |
@@ -861,6 +1196,9 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset, | |||
861 | u64 map_length; | 1196 | u64 map_length; |
862 | int ret; | 1197 | int ret; |
863 | 1198 | ||
1199 | if (bio_flags & EXTENT_BIO_COMPRESSED) | ||
1200 | return 0; | ||
1201 | |||
864 | length = bio->bi_size; | 1202 | length = bio->bi_size; |
865 | map_tree = &root->fs_info->mapping_tree; | 1203 | map_tree = &root->fs_info->mapping_tree; |
866 | map_length = length; | 1204 | map_length = length; |
@@ -925,12 +1263,12 @@ int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, | |||
925 | btrfs_test_flag(inode, NODATASUM); | 1263 | btrfs_test_flag(inode, NODATASUM); |
926 | 1264 | ||
927 | if (!(rw & (1 << BIO_RW))) { | 1265 | if (!(rw & (1 << BIO_RW))) { |
928 | if (!skip_sum) | ||
929 | btrfs_lookup_bio_sums(root, inode, bio); | ||
930 | 1266 | ||
931 | if (bio_flags & EXTENT_BIO_COMPRESSED) | 1267 | if (bio_flags & EXTENT_BIO_COMPRESSED) |
932 | return btrfs_submit_compressed_read(inode, bio, | 1268 | return btrfs_submit_compressed_read(inode, bio, |
933 | mirror_num, bio_flags); | 1269 | mirror_num, bio_flags); |
1270 | else if (!skip_sum) | ||
1271 | btrfs_lookup_bio_sums(root, inode, bio); | ||
934 | goto mapit; | 1272 | goto mapit; |
935 | } else if (!skip_sum) { | 1273 | } else if (!skip_sum) { |
936 | /* we're doing a write, do the async checksumming */ | 1274 | /* we're doing a write, do the async checksumming */ |
@@ -966,6 +1304,9 @@ static noinline int add_pending_csums(struct btrfs_trans_handle *trans, | |||
966 | 1304 | ||
967 | int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end) | 1305 | int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end) |
968 | { | 1306 | { |
1307 | if ((end & (PAGE_CACHE_SIZE - 1)) == 0) { | ||
1308 | WARN_ON(1); | ||
1309 | } | ||
969 | return set_extent_delalloc(&BTRFS_I(inode)->io_tree, start, end, | 1310 | return set_extent_delalloc(&BTRFS_I(inode)->io_tree, start, end, |
970 | GFP_NOFS); | 1311 | GFP_NOFS); |
971 | } | 1312 | } |
@@ -2105,6 +2446,7 @@ noinline int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, | |||
2105 | int pending_del_nr = 0; | 2446 | int pending_del_nr = 0; |
2106 | int pending_del_slot = 0; | 2447 | int pending_del_slot = 0; |
2107 | int extent_type = -1; | 2448 | int extent_type = -1; |
2449 | int encoding; | ||
2108 | u64 mask = root->sectorsize - 1; | 2450 | u64 mask = root->sectorsize - 1; |
2109 | 2451 | ||
2110 | if (root->ref_cows) | 2452 | if (root->ref_cows) |
@@ -2144,6 +2486,7 @@ search_again: | |||
2144 | leaf = path->nodes[0]; | 2486 | leaf = path->nodes[0]; |
2145 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); | 2487 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); |
2146 | found_type = btrfs_key_type(&found_key); | 2488 | found_type = btrfs_key_type(&found_key); |
2489 | encoding = 0; | ||
2147 | 2490 | ||
2148 | if (found_key.objectid != inode->i_ino) | 2491 | if (found_key.objectid != inode->i_ino) |
2149 | break; | 2492 | break; |
@@ -2156,6 +2499,10 @@ search_again: | |||
2156 | fi = btrfs_item_ptr(leaf, path->slots[0], | 2499 | fi = btrfs_item_ptr(leaf, path->slots[0], |
2157 | struct btrfs_file_extent_item); | 2500 | struct btrfs_file_extent_item); |
2158 | extent_type = btrfs_file_extent_type(leaf, fi); | 2501 | extent_type = btrfs_file_extent_type(leaf, fi); |
2502 | encoding = btrfs_file_extent_compression(leaf, fi); | ||
2503 | encoding |= btrfs_file_extent_encryption(leaf, fi); | ||
2504 | encoding |= btrfs_file_extent_other_encoding(leaf, fi); | ||
2505 | |||
2159 | if (extent_type != BTRFS_FILE_EXTENT_INLINE) { | 2506 | if (extent_type != BTRFS_FILE_EXTENT_INLINE) { |
2160 | item_end += | 2507 | item_end += |
2161 | btrfs_file_extent_num_bytes(leaf, fi); | 2508 | btrfs_file_extent_num_bytes(leaf, fi); |
@@ -2200,7 +2547,7 @@ search_again: | |||
2200 | if (extent_type != BTRFS_FILE_EXTENT_INLINE) { | 2547 | if (extent_type != BTRFS_FILE_EXTENT_INLINE) { |
2201 | u64 num_dec; | 2548 | u64 num_dec; |
2202 | extent_start = btrfs_file_extent_disk_bytenr(leaf, fi); | 2549 | extent_start = btrfs_file_extent_disk_bytenr(leaf, fi); |
2203 | if (!del_item) { | 2550 | if (!del_item && !encoding) { |
2204 | u64 orig_num_bytes = | 2551 | u64 orig_num_bytes = |
2205 | btrfs_file_extent_num_bytes(leaf, fi); | 2552 | btrfs_file_extent_num_bytes(leaf, fi); |
2206 | extent_num_bytes = new_size - | 2553 | extent_num_bytes = new_size - |
@@ -2436,7 +2783,14 @@ int btrfs_cont_expand(struct inode *inode, loff_t size) | |||
2436 | last_byte = min(extent_map_end(em), block_end); | 2783 | last_byte = min(extent_map_end(em), block_end); |
2437 | last_byte = (last_byte + mask) & ~mask; | 2784 | last_byte = (last_byte + mask) & ~mask; |
2438 | if (test_bit(EXTENT_FLAG_VACANCY, &em->flags)) { | 2785 | if (test_bit(EXTENT_FLAG_VACANCY, &em->flags)) { |
2786 | u64 hint_byte = 0; | ||
2439 | hole_size = last_byte - cur_offset; | 2787 | hole_size = last_byte - cur_offset; |
2788 | err = btrfs_drop_extents(trans, root, inode, | ||
2789 | cur_offset, | ||
2790 | cur_offset + hole_size, | ||
2791 | cur_offset, &hint_byte); | ||
2792 | if (err) | ||
2793 | break; | ||
2440 | err = btrfs_insert_file_extent(trans, root, | 2794 | err = btrfs_insert_file_extent(trans, root, |
2441 | inode->i_ino, cur_offset, 0, | 2795 | inode->i_ino, cur_offset, 0, |
2442 | 0, hole_size, 0, hole_size, | 2796 | 0, hole_size, 0, hole_size, |
@@ -3785,6 +4139,7 @@ int btrfs_writepages(struct address_space *mapping, | |||
3785 | struct writeback_control *wbc) | 4139 | struct writeback_control *wbc) |
3786 | { | 4140 | { |
3787 | struct extent_io_tree *tree; | 4141 | struct extent_io_tree *tree; |
4142 | |||
3788 | tree = &BTRFS_I(mapping->host)->io_tree; | 4143 | tree = &BTRFS_I(mapping->host)->io_tree; |
3789 | return extent_writepages(tree, mapping, btrfs_get_extent, wbc); | 4144 | return extent_writepages(tree, mapping, btrfs_get_extent, wbc); |
3790 | } | 4145 | } |
@@ -4285,9 +4640,11 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root) | |||
4285 | * ordered extents get created before we return | 4640 | * ordered extents get created before we return |
4286 | */ | 4641 | */ |
4287 | atomic_inc(&root->fs_info->async_submit_draining); | 4642 | atomic_inc(&root->fs_info->async_submit_draining); |
4288 | while(atomic_read(&root->fs_info->nr_async_submits)) { | 4643 | while(atomic_read(&root->fs_info->nr_async_submits) || |
4644 | atomic_read(&root->fs_info->async_delalloc_pages)) { | ||
4289 | wait_event(root->fs_info->async_submit_wait, | 4645 | wait_event(root->fs_info->async_submit_wait, |
4290 | (atomic_read(&root->fs_info->nr_async_submits) == 0)); | 4646 | (atomic_read(&root->fs_info->nr_async_submits) == 0 && |
4647 | atomic_read(&root->fs_info->async_delalloc_pages) == 0)); | ||
4291 | } | 4648 | } |
4292 | atomic_dec(&root->fs_info->async_submit_draining); | 4649 | atomic_dec(&root->fs_info->async_submit_draining); |
4293 | return 0; | 4650 | return 0; |
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 370bb4285597..027ad6b3839e 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c | |||
@@ -390,7 +390,7 @@ void btrfs_start_ordered_extent(struct inode *inode, | |||
390 | * start IO on any dirty ones so the wait doesn't stall waiting | 390 | * start IO on any dirty ones so the wait doesn't stall waiting |
391 | * for pdflush to find them | 391 | * for pdflush to find them |
392 | */ | 392 | */ |
393 | btrfs_fdatawrite_range(inode->i_mapping, start, end, WB_SYNC_NONE); | 393 | btrfs_fdatawrite_range(inode->i_mapping, start, end, WB_SYNC_ALL); |
394 | if (wait) { | 394 | if (wait) { |
395 | wait_event(entry->wait, test_bit(BTRFS_ORDERED_COMPLETE, | 395 | wait_event(entry->wait, test_bit(BTRFS_ORDERED_COMPLETE, |
396 | &entry->flags)); | 396 | &entry->flags)); |
@@ -421,6 +421,12 @@ again: | |||
421 | */ | 421 | */ |
422 | btrfs_fdatawrite_range(inode->i_mapping, start, orig_end, WB_SYNC_NONE); | 422 | btrfs_fdatawrite_range(inode->i_mapping, start, orig_end, WB_SYNC_NONE); |
423 | 423 | ||
424 | /* The compression code will leave pages locked but return from | ||
425 | * writepage without setting the page writeback. Starting again | ||
426 | * with WB_SYNC_ALL will end up waiting for the IO to actually start. | ||
427 | */ | ||
428 | btrfs_fdatawrite_range(inode->i_mapping, start, orig_end, WB_SYNC_ALL); | ||
429 | |||
424 | btrfs_wait_on_page_writeback_range(inode->i_mapping, | 430 | btrfs_wait_on_page_writeback_range(inode->i_mapping, |
425 | start >> PAGE_CACHE_SHIFT, | 431 | start >> PAGE_CACHE_SHIFT, |
426 | orig_end >> PAGE_CACHE_SHIFT); | 432 | orig_end >> PAGE_CACHE_SHIFT); |
@@ -448,10 +454,7 @@ again: | |||
448 | } | 454 | } |
449 | if (test_range_bit(&BTRFS_I(inode)->io_tree, start, orig_end, | 455 | if (test_range_bit(&BTRFS_I(inode)->io_tree, start, orig_end, |
450 | EXTENT_ORDERED | EXTENT_DELALLOC, 0)) { | 456 | EXTENT_ORDERED | EXTENT_DELALLOC, 0)) { |
451 | printk("inode %lu still ordered or delalloc after wait " | 457 | schedule_timeout(1); |
452 | "%llu %llu\n", inode->i_ino, | ||
453 | (unsigned long long)start, | ||
454 | (unsigned long long)orig_end); | ||
455 | goto again; | 458 | goto again; |
456 | } | 459 | } |
457 | return 0; | 460 | return 0; |
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 431fdf144b58..ab9d5e89ed13 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c | |||
@@ -375,6 +375,10 @@ int btrfs_sync_fs(struct super_block *sb, int wait) | |||
375 | filemap_flush(root->fs_info->btree_inode->i_mapping); | 375 | filemap_flush(root->fs_info->btree_inode->i_mapping); |
376 | return 0; | 376 | return 0; |
377 | } | 377 | } |
378 | |||
379 | btrfs_start_delalloc_inodes(root); | ||
380 | btrfs_wait_ordered_extents(root, 0); | ||
381 | |||
378 | btrfs_clean_old_snapshots(root); | 382 | btrfs_clean_old_snapshots(root); |
379 | trans = btrfs_start_transaction(root, 1); | 383 | trans = btrfs_start_transaction(root, 1); |
380 | ret = btrfs_commit_transaction(trans, root); | 384 | ret = btrfs_commit_transaction(trans, root); |
diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c index e99309180a11..ba2527d08734 100644 --- a/fs/btrfs/zlib.c +++ b/fs/btrfs/zlib.c | |||
@@ -423,8 +423,9 @@ int btrfs_zlib_decompress_biovec(struct page **pages_in, | |||
423 | /* we didn't make progress in this inflate | 423 | /* we didn't make progress in this inflate |
424 | * call, we're done | 424 | * call, we're done |
425 | */ | 425 | */ |
426 | if (ret != Z_STREAM_END) | 426 | if (ret != Z_STREAM_END) { |
427 | ret = -1; | 427 | ret = -1; |
428 | } | ||
428 | break; | 429 | break; |
429 | } | 430 | } |
430 | 431 | ||