aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/inode.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/inode.c')
-rw-r--r--fs/btrfs/inode.c643
1 files changed, 500 insertions, 143 deletions
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 3df0ffad976e..e01c0d0310ab 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -86,6 +86,10 @@ static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
86 86
87static void btrfs_truncate(struct inode *inode); 87static void btrfs_truncate(struct inode *inode);
88static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end); 88static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end);
89static noinline int cow_file_range(struct inode *inode,
90 struct page *locked_page,
91 u64 start, u64 end, int *page_started,
92 unsigned long *nr_written, int unlock);
89 93
90/* 94/*
91 * a very lame attempt at stopping writes when the FS is 85% full. There 95 * a very lame attempt at stopping writes when the FS is 85% full. There
@@ -262,35 +266,72 @@ static int cow_file_range_inline(struct btrfs_trans_handle *trans,
262 return 0; 266 return 0;
263} 267}
264 268
269struct async_extent {
270 u64 start;
271 u64 ram_size;
272 u64 compressed_size;
273 struct page **pages;
274 unsigned long nr_pages;
275 struct list_head list;
276};
277
278struct async_cow {
279 struct inode *inode;
280 struct btrfs_root *root;
281 struct page *locked_page;
282 u64 start;
283 u64 end;
284 struct list_head extents;
285 struct btrfs_work work;
286};
287
288static noinline int add_async_extent(struct async_cow *cow,
289 u64 start, u64 ram_size,
290 u64 compressed_size,
291 struct page **pages,
292 unsigned long nr_pages)
293{
294 struct async_extent *async_extent;
295
296 async_extent = kmalloc(sizeof(*async_extent), GFP_NOFS);
297 async_extent->start = start;
298 async_extent->ram_size = ram_size;
299 async_extent->compressed_size = compressed_size;
300 async_extent->pages = pages;
301 async_extent->nr_pages = nr_pages;
302 list_add_tail(&async_extent->list, &cow->extents);
303 return 0;
304}
305
265/* 306/*
266 * when extent_io.c finds a delayed allocation range in the file, 307 * we create compressed extents in two phases. The first
267 * the call backs end up in this code. The basic idea is to 308 * phase compresses a range of pages that have already been
268 * allocate extents on disk for the range, and create ordered data structs 309 * locked (both pages and state bits are locked).
269 * in ram to track those extents.
270 * 310 *
271 * locked_page is the page that writepage had locked already. We use 311 * This is done inside an ordered work queue, and the compression
272 * it to make sure we don't do extra locks or unlocks. 312 * is spread across many cpus. The actual IO submission is step
313 * two, and the ordered work queue takes care of making sure that
314 * happens in the same order things were put onto the queue by
315 * writepages and friends.
273 * 316 *
274 * *page_started is set to one if we unlock locked_page and do everything 317 * If this code finds it can't get good compression, it puts an
275 * required to start IO on it. It may be clean and already done with 318 * entry onto the work queue to write the uncompressed bytes. This
276 * IO when we return. 319 * makes sure that both compressed inodes and uncompressed inodes
320 * are written in the same order that pdflush sent them down.
277 */ 321 */
278static int cow_file_range(struct inode *inode, struct page *locked_page, 322static noinline int compress_file_range(struct inode *inode,
279 u64 start, u64 end, int *page_started) 323 struct page *locked_page,
324 u64 start, u64 end,
325 struct async_cow *async_cow,
326 int *num_added)
280{ 327{
281 struct btrfs_root *root = BTRFS_I(inode)->root; 328 struct btrfs_root *root = BTRFS_I(inode)->root;
282 struct btrfs_trans_handle *trans; 329 struct btrfs_trans_handle *trans;
283 u64 alloc_hint = 0;
284 u64 num_bytes; 330 u64 num_bytes;
285 unsigned long ram_size;
286 u64 orig_start; 331 u64 orig_start;
287 u64 disk_num_bytes; 332 u64 disk_num_bytes;
288 u64 cur_alloc_size;
289 u64 blocksize = root->sectorsize; 333 u64 blocksize = root->sectorsize;
290 u64 actual_end; 334 u64 actual_end;
291 struct btrfs_key ins;
292 struct extent_map *em;
293 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
294 int ret = 0; 335 int ret = 0;
295 struct page **pages = NULL; 336 struct page **pages = NULL;
296 unsigned long nr_pages; 337 unsigned long nr_pages;
@@ -298,22 +339,12 @@ static int cow_file_range(struct inode *inode, struct page *locked_page,
298 unsigned long total_compressed = 0; 339 unsigned long total_compressed = 0;
299 unsigned long total_in = 0; 340 unsigned long total_in = 0;
300 unsigned long max_compressed = 128 * 1024; 341 unsigned long max_compressed = 128 * 1024;
301 unsigned long max_uncompressed = 256 * 1024; 342 unsigned long max_uncompressed = 128 * 1024;
302 int i; 343 int i;
303 int ordered_type;
304 int will_compress; 344 int will_compress;
305 345
306 trans = btrfs_join_transaction(root, 1);
307 BUG_ON(!trans);
308 btrfs_set_trans_block_group(trans, inode);
309 orig_start = start; 346 orig_start = start;
310 347
311 /*
312 * compression made this loop a bit ugly, but the basic idea is to
313 * compress some pages but keep the total size of the compressed
314 * extent relatively small. If compression is off, this goto target
315 * is never used.
316 */
317again: 348again:
318 will_compress = 0; 349 will_compress = 0;
319 nr_pages = (end >> PAGE_CACHE_SHIFT) - (start >> PAGE_CACHE_SHIFT) + 1; 350 nr_pages = (end >> PAGE_CACHE_SHIFT) - (start >> PAGE_CACHE_SHIFT) + 1;
@@ -324,7 +355,13 @@ again:
324 355
325 /* we want to make sure that amount of ram required to uncompress 356 /* we want to make sure that amount of ram required to uncompress
326 * an extent is reasonable, so we limit the total size in ram 357 * an extent is reasonable, so we limit the total size in ram
327 * of a compressed extent to 256k 358 * of a compressed extent to 128k. This is a crucial number
359 * because it also controls how easily we can spread reads across
360 * cpus for decompression.
361 *
362 * We also want to make sure the amount of IO required to do
363 * a random read is reasonably small, so we limit the size of
364 * a compressed extent to 128k.
328 */ 365 */
329 total_compressed = min(total_compressed, max_uncompressed); 366 total_compressed = min(total_compressed, max_uncompressed);
330 num_bytes = (end - start + blocksize) & ~(blocksize - 1); 367 num_bytes = (end - start + blocksize) & ~(blocksize - 1);
@@ -333,18 +370,16 @@ again:
333 total_in = 0; 370 total_in = 0;
334 ret = 0; 371 ret = 0;
335 372
336 /* we do compression for mount -o compress and when the 373 /*
337 * inode has not been flagged as nocompress 374 * we do compression for mount -o compress and when the
375 * inode has not been flagged as nocompress. This flag can
376 * change at any time if we discover bad compression ratios.
338 */ 377 */
339 if (!btrfs_test_flag(inode, NOCOMPRESS) && 378 if (!btrfs_test_flag(inode, NOCOMPRESS) &&
340 btrfs_test_opt(root, COMPRESS)) { 379 btrfs_test_opt(root, COMPRESS)) {
341 WARN_ON(pages); 380 WARN_ON(pages);
342 pages = kzalloc(sizeof(struct page *) * nr_pages, GFP_NOFS); 381 pages = kzalloc(sizeof(struct page *) * nr_pages, GFP_NOFS);
343 382
344 /* we want to make sure the amount of IO required to satisfy
345 * a random read is reasonably small, so we limit the size
346 * of a compressed extent to 128k
347 */
348 ret = btrfs_zlib_compress_pages(inode->i_mapping, start, 383 ret = btrfs_zlib_compress_pages(inode->i_mapping, start,
349 total_compressed, pages, 384 total_compressed, pages,
350 nr_pages, &nr_pages_ret, 385 nr_pages, &nr_pages_ret,
@@ -371,26 +406,34 @@ again:
371 } 406 }
372 } 407 }
373 if (start == 0) { 408 if (start == 0) {
409 trans = btrfs_join_transaction(root, 1);
410 BUG_ON(!trans);
411 btrfs_set_trans_block_group(trans, inode);
412
374 /* lets try to make an inline extent */ 413 /* lets try to make an inline extent */
375 if (ret || total_in < (end - start + 1)) { 414 if (ret || total_in < (actual_end - start)) {
376 /* we didn't compress the entire range, try 415 /* we didn't compress the entire range, try
377 * to make an uncompressed inline extent. This 416 * to make an uncompressed inline extent.
378 * is almost sure to fail, but maybe inline sizes
379 * will get bigger later
380 */ 417 */
381 ret = cow_file_range_inline(trans, root, inode, 418 ret = cow_file_range_inline(trans, root, inode,
382 start, end, 0, NULL); 419 start, end, 0, NULL);
383 } else { 420 } else {
421 /* try making a compressed inline extent */
384 ret = cow_file_range_inline(trans, root, inode, 422 ret = cow_file_range_inline(trans, root, inode,
385 start, end, 423 start, end,
386 total_compressed, pages); 424 total_compressed, pages);
387 } 425 }
426 btrfs_end_transaction(trans, root);
388 if (ret == 0) { 427 if (ret == 0) {
428 /*
429 * inline extent creation worked, we don't need
430 * to create any more async work items. Unlock
431 * and free up our temp pages.
432 */
389 extent_clear_unlock_delalloc(inode, 433 extent_clear_unlock_delalloc(inode,
390 &BTRFS_I(inode)->io_tree, 434 &BTRFS_I(inode)->io_tree,
391 start, end, NULL, 435 start, end, NULL, 1, 0,
392 1, 1, 1); 436 0, 1, 1, 1);
393 *page_started = 1;
394 ret = 0; 437 ret = 0;
395 goto free_pages_out; 438 goto free_pages_out;
396 } 439 }
@@ -435,53 +478,280 @@ again:
435 /* flag the file so we don't compress in the future */ 478 /* flag the file so we don't compress in the future */
436 btrfs_set_flag(inode, NOCOMPRESS); 479 btrfs_set_flag(inode, NOCOMPRESS);
437 } 480 }
481 if (will_compress) {
482 *num_added += 1;
438 483
439 BUG_ON(disk_num_bytes > 484 /* the async work queues will take care of doing actual
440 btrfs_super_total_bytes(&root->fs_info->super_copy)); 485 * allocation on disk for these compressed pages,
486 * and will submit them to the elevator.
487 */
488 add_async_extent(async_cow, start, num_bytes,
489 total_compressed, pages, nr_pages_ret);
441 490
442 btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0); 491 if (start + num_bytes < end) {
492 start += num_bytes;
493 pages = NULL;
494 cond_resched();
495 goto again;
496 }
497 } else {
498 /*
499 * No compression, but we still need to write the pages in
500 * the file we've been given so far. redirty the locked
501 * page if it corresponds to our extent and set things up
502 * for the async work queue to run cow_file_range to do
503 * the normal delalloc dance
504 */
505 if (page_offset(locked_page) >= start &&
506 page_offset(locked_page) <= end) {
507 __set_page_dirty_nobuffers(locked_page);
508 /* unlocked later on in the async handlers */
509 }
510 add_async_extent(async_cow, start, end - start + 1, 0, NULL, 0);
511 *num_added += 1;
512 }
443 513
444 while(disk_num_bytes > 0) { 514out:
445 unsigned long min_bytes; 515 return 0;
516
517free_pages_out:
518 for (i = 0; i < nr_pages_ret; i++) {
519 WARN_ON(pages[i]->mapping);
520 page_cache_release(pages[i]);
521 }
522 if (pages)
523 kfree(pages);
524
525 goto out;
526}
527
528/*
529 * phase two of compressed writeback. This is the ordered portion
530 * of the code, which only gets called in the order the work was
531 * queued. We walk all the async extents created by compress_file_range
532 * and send them down to the disk.
533 */
534static noinline int submit_compressed_extents(struct inode *inode,
535 struct async_cow *async_cow)
536{
537 struct async_extent *async_extent;
538 u64 alloc_hint = 0;
539 struct btrfs_trans_handle *trans;
540 struct btrfs_key ins;
541 struct extent_map *em;
542 struct btrfs_root *root = BTRFS_I(inode)->root;
543 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
544 struct extent_io_tree *io_tree;
545 int ret;
546
547 if (list_empty(&async_cow->extents))
548 return 0;
549
550 trans = btrfs_join_transaction(root, 1);
551
552 while(!list_empty(&async_cow->extents)) {
553 async_extent = list_entry(async_cow->extents.next,
554 struct async_extent, list);
555 list_del(&async_extent->list);
446 556
557 io_tree = &BTRFS_I(inode)->io_tree;
558
559 /* did the compression code fall back to uncompressed IO? */
560 if (!async_extent->pages) {
561 int page_started = 0;
562 unsigned long nr_written = 0;
563
564 lock_extent(io_tree, async_extent->start,
565 async_extent->start + async_extent->ram_size - 1,
566 GFP_NOFS);
567
568 /* allocate blocks */
569 cow_file_range(inode, async_cow->locked_page,
570 async_extent->start,
571 async_extent->start +
572 async_extent->ram_size - 1,
573 &page_started, &nr_written, 0);
574
575 /*
576 * if page_started, cow_file_range inserted an
577 * inline extent and took care of all the unlocking
578 * and IO for us. Otherwise, we need to submit
579 * all those pages down to the drive.
580 */
581 if (!page_started)
582 extent_write_locked_range(io_tree,
583 inode, async_extent->start,
584 async_extent->start +
585 async_extent->ram_size - 1,
586 btrfs_get_extent,
587 WB_SYNC_ALL);
588 kfree(async_extent);
589 cond_resched();
590 continue;
591 }
592
593 lock_extent(io_tree, async_extent->start,
594 async_extent->start + async_extent->ram_size - 1,
595 GFP_NOFS);
447 /* 596 /*
448 * the max size of a compressed extent is pretty small, 597 * here we're doing allocation and writeback of the
449 * make the code a little less complex by forcing 598 * compressed pages
450 * the allocator to find a whole compressed extent at once
451 */ 599 */
452 if (will_compress) 600 btrfs_drop_extent_cache(inode, async_extent->start,
453 min_bytes = disk_num_bytes; 601 async_extent->start +
454 else 602 async_extent->ram_size - 1, 0);
455 min_bytes = root->sectorsize; 603
604 ret = btrfs_reserve_extent(trans, root,
605 async_extent->compressed_size,
606 async_extent->compressed_size,
607 0, alloc_hint,
608 (u64)-1, &ins, 1);
609 BUG_ON(ret);
610 em = alloc_extent_map(GFP_NOFS);
611 em->start = async_extent->start;
612 em->len = async_extent->ram_size;
613
614 em->block_start = ins.objectid;
615 em->block_len = ins.offset;
616 em->bdev = root->fs_info->fs_devices->latest_bdev;
617 set_bit(EXTENT_FLAG_PINNED, &em->flags);
618 set_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
619
620 while(1) {
621 spin_lock(&em_tree->lock);
622 ret = add_extent_mapping(em_tree, em);
623 spin_unlock(&em_tree->lock);
624 if (ret != -EEXIST) {
625 free_extent_map(em);
626 break;
627 }
628 btrfs_drop_extent_cache(inode, async_extent->start,
629 async_extent->start +
630 async_extent->ram_size - 1, 0);
631 }
632
633 ret = btrfs_add_ordered_extent(inode, async_extent->start,
634 ins.objectid,
635 async_extent->ram_size,
636 ins.offset,
637 BTRFS_ORDERED_COMPRESSED);
638 BUG_ON(ret);
639
640 btrfs_end_transaction(trans, root);
641
642 /*
643 * clear dirty, set writeback and unlock the pages.
644 */
645 extent_clear_unlock_delalloc(inode,
646 &BTRFS_I(inode)->io_tree,
647 async_extent->start,
648 async_extent->start +
649 async_extent->ram_size - 1,
650 NULL, 1, 1, 0, 1, 1, 0);
651
652 ret = btrfs_submit_compressed_write(inode,
653 async_extent->start,
654 async_extent->ram_size,
655 ins.objectid,
656 ins.offset, async_extent->pages,
657 async_extent->nr_pages);
658
659 BUG_ON(ret);
660 trans = btrfs_join_transaction(root, 1);
661 alloc_hint = ins.objectid + ins.offset;
662 kfree(async_extent);
663 cond_resched();
664 }
665
666 btrfs_end_transaction(trans, root);
667 return 0;
668}
669
670/*
671 * when extent_io.c finds a delayed allocation range in the file,
672 * the call backs end up in this code. The basic idea is to
673 * allocate extents on disk for the range, and create ordered data structs
674 * in ram to track those extents.
675 *
676 * locked_page is the page that writepage had locked already. We use
677 * it to make sure we don't do extra locks or unlocks.
678 *
679 * *page_started is set to one if we unlock locked_page and do everything
680 * required to start IO on it. It may be clean and already done with
681 * IO when we return.
682 */
683static noinline int cow_file_range(struct inode *inode,
684 struct page *locked_page,
685 u64 start, u64 end, int *page_started,
686 unsigned long *nr_written,
687 int unlock)
688{
689 struct btrfs_root *root = BTRFS_I(inode)->root;
690 struct btrfs_trans_handle *trans;
691 u64 alloc_hint = 0;
692 u64 num_bytes;
693 unsigned long ram_size;
694 u64 disk_num_bytes;
695 u64 cur_alloc_size;
696 u64 blocksize = root->sectorsize;
697 u64 actual_end;
698 struct btrfs_key ins;
699 struct extent_map *em;
700 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
701 int ret = 0;
702
703 trans = btrfs_join_transaction(root, 1);
704 BUG_ON(!trans);
705 btrfs_set_trans_block_group(trans, inode);
456 706
707 actual_end = min_t(u64, i_size_read(inode), end + 1);
708
709 num_bytes = (end - start + blocksize) & ~(blocksize - 1);
710 num_bytes = max(blocksize, num_bytes);
711 disk_num_bytes = num_bytes;
712 ret = 0;
713
714 if (start == 0) {
715 /* lets try to make an inline extent */
716 ret = cow_file_range_inline(trans, root, inode,
717 start, end, 0, NULL);
718 if (ret == 0) {
719 extent_clear_unlock_delalloc(inode,
720 &BTRFS_I(inode)->io_tree,
721 start, end, NULL, 1, 1,
722 1, 1, 1, 1);
723 *nr_written = *nr_written +
724 (end - start + PAGE_CACHE_SIZE) / PAGE_CACHE_SIZE;
725 *page_started = 1;
726 ret = 0;
727 goto out;
728 }
729 }
730
731 BUG_ON(disk_num_bytes >
732 btrfs_super_total_bytes(&root->fs_info->super_copy));
733
734 btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0);
735
736 while(disk_num_bytes > 0) {
457 cur_alloc_size = min(disk_num_bytes, root->fs_info->max_extent); 737 cur_alloc_size = min(disk_num_bytes, root->fs_info->max_extent);
458 ret = btrfs_reserve_extent(trans, root, cur_alloc_size, 738 ret = btrfs_reserve_extent(trans, root, cur_alloc_size,
459 min_bytes, 0, alloc_hint, 739 root->sectorsize, 0, alloc_hint,
460 (u64)-1, &ins, 1); 740 (u64)-1, &ins, 1);
461 if (ret) { 741 if (ret) {
462 WARN_ON(1); 742 BUG();
463 goto free_pages_out_fail;
464 } 743 }
465 em = alloc_extent_map(GFP_NOFS); 744 em = alloc_extent_map(GFP_NOFS);
466 em->start = start; 745 em->start = start;
467 746
468 if (will_compress) { 747 ram_size = ins.offset;
469 ram_size = num_bytes; 748 em->len = ins.offset;
470 em->len = num_bytes;
471 } else {
472 /* ramsize == disk size */
473 ram_size = ins.offset;
474 em->len = ins.offset;
475 }
476 749
477 em->block_start = ins.objectid; 750 em->block_start = ins.objectid;
478 em->block_len = ins.offset; 751 em->block_len = ins.offset;
479 em->bdev = root->fs_info->fs_devices->latest_bdev; 752 em->bdev = root->fs_info->fs_devices->latest_bdev;
480 set_bit(EXTENT_FLAG_PINNED, &em->flags); 753 set_bit(EXTENT_FLAG_PINNED, &em->flags);
481 754
482 if (will_compress)
483 set_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
484
485 while(1) { 755 while(1) {
486 spin_lock(&em_tree->lock); 756 spin_lock(&em_tree->lock);
487 ret = add_extent_mapping(em_tree, em); 757 ret = add_extent_mapping(em_tree, em);
@@ -495,10 +765,8 @@ again:
495 } 765 }
496 766
497 cur_alloc_size = ins.offset; 767 cur_alloc_size = ins.offset;
498 ordered_type = will_compress ? BTRFS_ORDERED_COMPRESSED : 0;
499 ret = btrfs_add_ordered_extent(inode, start, ins.objectid, 768 ret = btrfs_add_ordered_extent(inode, start, ins.objectid,
500 ram_size, cur_alloc_size, 769 ram_size, cur_alloc_size, 0);
501 ordered_type);
502 BUG_ON(ret); 770 BUG_ON(ret);
503 771
504 if (disk_num_bytes < cur_alloc_size) { 772 if (disk_num_bytes < cur_alloc_size) {
@@ -506,82 +774,145 @@ again:
506 cur_alloc_size); 774 cur_alloc_size);
507 break; 775 break;
508 } 776 }
509
510 if (will_compress) {
511 /*
512 * we're doing compression, we and we need to
513 * submit the compressed extents down to the device.
514 *
515 * We lock down all the file pages, clearing their
516 * dirty bits and setting them writeback. Everyone
517 * that wants to modify the page will wait on the
518 * ordered extent above.
519 *
520 * The writeback bits on the file pages are
521 * cleared when the compressed pages are on disk
522 */
523 btrfs_end_transaction(trans, root);
524
525 if (start <= page_offset(locked_page) &&
526 page_offset(locked_page) < start + ram_size) {
527 *page_started = 1;
528 }
529
530 extent_clear_unlock_delalloc(inode,
531 &BTRFS_I(inode)->io_tree,
532 start,
533 start + ram_size - 1,
534 NULL, 1, 1, 0);
535
536 ret = btrfs_submit_compressed_write(inode, start,
537 ram_size, ins.objectid,
538 cur_alloc_size, pages,
539 nr_pages_ret);
540
541 BUG_ON(ret);
542 trans = btrfs_join_transaction(root, 1);
543 if (start + ram_size < end) {
544 start += ram_size;
545 alloc_hint = ins.objectid + ins.offset;
546 /* pages will be freed at end_bio time */
547 pages = NULL;
548 goto again;
549 } else {
550 /* we've written everything, time to go */
551 break;
552 }
553 }
554 /* we're not doing compressed IO, don't unlock the first 777 /* we're not doing compressed IO, don't unlock the first
555 * page (which the caller expects to stay locked), don't 778 * page (which the caller expects to stay locked), don't
556 * clear any dirty bits and don't set any writeback bits 779 * clear any dirty bits and don't set any writeback bits
557 */ 780 */
558 extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, 781 extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree,
559 start, start + ram_size - 1, 782 start, start + ram_size - 1,
560 locked_page, 0, 0, 0); 783 locked_page, unlock, 1,
784 1, 0, 0, 0);
561 disk_num_bytes -= cur_alloc_size; 785 disk_num_bytes -= cur_alloc_size;
562 num_bytes -= cur_alloc_size; 786 num_bytes -= cur_alloc_size;
563 alloc_hint = ins.objectid + ins.offset; 787 alloc_hint = ins.objectid + ins.offset;
564 start += cur_alloc_size; 788 start += cur_alloc_size;
565 } 789 }
566
567 ret = 0;
568out: 790out:
791 ret = 0;
569 btrfs_end_transaction(trans, root); 792 btrfs_end_transaction(trans, root);
570 793
571 return ret; 794 return ret;
795}
572 796
573free_pages_out_fail: 797/*
574 extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, 798 * work queue call back to started compression on a file and pages
575 start, end, locked_page, 0, 0, 0); 799 */
576free_pages_out: 800static noinline void async_cow_start(struct btrfs_work *work)
577 for (i = 0; i < nr_pages_ret; i++) { 801{
578 WARN_ON(pages[i]->mapping); 802 struct async_cow *async_cow;
579 page_cache_release(pages[i]); 803 int num_added = 0;
804 async_cow = container_of(work, struct async_cow, work);
805
806 compress_file_range(async_cow->inode, async_cow->locked_page,
807 async_cow->start, async_cow->end, async_cow,
808 &num_added);
809 if (num_added == 0)
810 async_cow->inode = NULL;
811}
812
813/*
814 * work queue call back to submit previously compressed pages
815 */
816static noinline void async_cow_submit(struct btrfs_work *work)
817{
818 struct async_cow *async_cow;
819 struct btrfs_root *root;
820 unsigned long nr_pages;
821
822 async_cow = container_of(work, struct async_cow, work);
823
824 root = async_cow->root;
825 nr_pages = (async_cow->end - async_cow->start + PAGE_CACHE_SIZE) >>
826 PAGE_CACHE_SHIFT;
827
828 atomic_sub(nr_pages, &root->fs_info->async_delalloc_pages);
829
830 if (atomic_read(&root->fs_info->async_delalloc_pages) <
831 5 * 1042 * 1024 &&
832 waitqueue_active(&root->fs_info->async_submit_wait))
833 wake_up(&root->fs_info->async_submit_wait);
834
835 if (async_cow->inode) {
836 submit_compressed_extents(async_cow->inode, async_cow);
580 } 837 }
581 if (pages) 838}
582 kfree(pages);
583 839
584 goto out; 840static noinline void async_cow_free(struct btrfs_work *work)
841{
842 struct async_cow *async_cow;
843 async_cow = container_of(work, struct async_cow, work);
844 kfree(async_cow);
845}
846
847static int cow_file_range_async(struct inode *inode, struct page *locked_page,
848 u64 start, u64 end, int *page_started,
849 unsigned long *nr_written)
850{
851 struct async_cow *async_cow;
852 struct btrfs_root *root = BTRFS_I(inode)->root;
853 unsigned long nr_pages;
854 u64 cur_end;
855 int limit = 10 * 1024 * 1042;
856
857 if (!btrfs_test_opt(root, COMPRESS)) {
858 return cow_file_range(inode, locked_page, start, end,
859 page_started, nr_written, 1);
860 }
861
862 clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, EXTENT_LOCKED |
863 EXTENT_DELALLOC, 1, 0, GFP_NOFS);
864 while(start < end) {
865 async_cow = kmalloc(sizeof(*async_cow), GFP_NOFS);
866 async_cow->inode = inode;
867 async_cow->root = root;
868 async_cow->locked_page = locked_page;
869 async_cow->start = start;
870
871 if (btrfs_test_flag(inode, NOCOMPRESS))
872 cur_end = end;
873 else
874 cur_end = min(end, start + 512 * 1024 - 1);
875
876 async_cow->end = cur_end;
877 INIT_LIST_HEAD(&async_cow->extents);
878
879 async_cow->work.func = async_cow_start;
880 async_cow->work.ordered_func = async_cow_submit;
881 async_cow->work.ordered_free = async_cow_free;
882 async_cow->work.flags = 0;
883
884 while(atomic_read(&root->fs_info->async_submit_draining) &&
885 atomic_read(&root->fs_info->async_delalloc_pages)) {
886 wait_event(root->fs_info->async_submit_wait,
887 (atomic_read(&root->fs_info->async_delalloc_pages)
888 == 0));
889 }
890
891 nr_pages = (cur_end - start + PAGE_CACHE_SIZE) >>
892 PAGE_CACHE_SHIFT;
893 atomic_add(nr_pages, &root->fs_info->async_delalloc_pages);
894
895 btrfs_queue_worker(&root->fs_info->delalloc_workers,
896 &async_cow->work);
897
898 if (atomic_read(&root->fs_info->async_delalloc_pages) > limit) {
899 wait_event(root->fs_info->async_submit_wait,
900 (atomic_read(&root->fs_info->async_delalloc_pages) <
901 limit));
902 }
903
904 while(atomic_read(&root->fs_info->async_submit_draining) &&
905 atomic_read(&root->fs_info->async_delalloc_pages)) {
906 wait_event(root->fs_info->async_submit_wait,
907 (atomic_read(&root->fs_info->async_delalloc_pages) ==
908 0));
909 }
910
911 *nr_written += nr_pages;
912 start = cur_end + 1;
913 }
914 *page_started = 1;
915 return 0;
585} 916}
586 917
587/* 918/*
@@ -592,7 +923,8 @@ free_pages_out:
592 * blocks on disk 923 * blocks on disk
593 */ 924 */
594static int run_delalloc_nocow(struct inode *inode, struct page *locked_page, 925static int run_delalloc_nocow(struct inode *inode, struct page *locked_page,
595 u64 start, u64 end, int *page_started, int force) 926 u64 start, u64 end, int *page_started, int force,
927 unsigned long *nr_written)
596{ 928{
597 struct btrfs_root *root = BTRFS_I(inode)->root; 929 struct btrfs_root *root = BTRFS_I(inode)->root;
598 struct btrfs_trans_handle *trans; 930 struct btrfs_trans_handle *trans;
@@ -711,7 +1043,8 @@ out_check:
711 btrfs_release_path(root, path); 1043 btrfs_release_path(root, path);
712 if (cow_start != (u64)-1) { 1044 if (cow_start != (u64)-1) {
713 ret = cow_file_range(inode, locked_page, cow_start, 1045 ret = cow_file_range(inode, locked_page, cow_start,
714 found_key.offset - 1, page_started); 1046 found_key.offset - 1, page_started,
1047 nr_written, 1);
715 BUG_ON(ret); 1048 BUG_ON(ret);
716 cow_start = (u64)-1; 1049 cow_start = (u64)-1;
717 } 1050 }
@@ -748,9 +1081,10 @@ out_check:
748 ret = btrfs_add_ordered_extent(inode, cur_offset, disk_bytenr, 1081 ret = btrfs_add_ordered_extent(inode, cur_offset, disk_bytenr,
749 num_bytes, num_bytes, type); 1082 num_bytes, num_bytes, type);
750 BUG_ON(ret); 1083 BUG_ON(ret);
1084
751 extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, 1085 extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree,
752 cur_offset, cur_offset + num_bytes - 1, 1086 cur_offset, cur_offset + num_bytes - 1,
753 locked_page, 0, 0, 0); 1087 locked_page, 1, 1, 1, 0, 0, 0);
754 cur_offset = extent_end; 1088 cur_offset = extent_end;
755 if (cur_offset > end) 1089 if (cur_offset > end)
756 break; 1090 break;
@@ -761,7 +1095,7 @@ out_check:
761 cow_start = cur_offset; 1095 cow_start = cur_offset;
762 if (cow_start != (u64)-1) { 1096 if (cow_start != (u64)-1) {
763 ret = cow_file_range(inode, locked_page, cow_start, end, 1097 ret = cow_file_range(inode, locked_page, cow_start, end,
764 page_started); 1098 page_started, nr_written, 1);
765 BUG_ON(ret); 1099 BUG_ON(ret);
766 } 1100 }
767 1101
@@ -775,7 +1109,8 @@ out_check:
775 * extent_io.c call back to do delayed allocation processing 1109 * extent_io.c call back to do delayed allocation processing
776 */ 1110 */
777static int run_delalloc_range(struct inode *inode, struct page *locked_page, 1111static int run_delalloc_range(struct inode *inode, struct page *locked_page,
778 u64 start, u64 end, int *page_started) 1112 u64 start, u64 end, int *page_started,
1113 unsigned long *nr_written)
779{ 1114{
780 struct btrfs_root *root = BTRFS_I(inode)->root; 1115 struct btrfs_root *root = BTRFS_I(inode)->root;
781 int ret; 1116 int ret;
@@ -783,13 +1118,13 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page,
783 if (btrfs_test_opt(root, NODATACOW) || 1118 if (btrfs_test_opt(root, NODATACOW) ||
784 btrfs_test_flag(inode, NODATACOW)) 1119 btrfs_test_flag(inode, NODATACOW))
785 ret = run_delalloc_nocow(inode, locked_page, start, end, 1120 ret = run_delalloc_nocow(inode, locked_page, start, end,
786 page_started, 0); 1121 page_started, 0, nr_written);
787 else if (btrfs_test_flag(inode, PREALLOC)) 1122 else if (btrfs_test_flag(inode, PREALLOC))
788 ret = run_delalloc_nocow(inode, locked_page, start, end, 1123 ret = run_delalloc_nocow(inode, locked_page, start, end,
789 page_started, 1); 1124 page_started, 1, nr_written);
790 else 1125 else
791 ret = cow_file_range(inode, locked_page, start, end, 1126 ret = cow_file_range_async(inode, locked_page, start, end,
792 page_started); 1127 page_started, nr_written);
793 1128
794 return ret; 1129 return ret;
795} 1130}
@@ -861,6 +1196,9 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
861 u64 map_length; 1196 u64 map_length;
862 int ret; 1197 int ret;
863 1198
1199 if (bio_flags & EXTENT_BIO_COMPRESSED)
1200 return 0;
1201
864 length = bio->bi_size; 1202 length = bio->bi_size;
865 map_tree = &root->fs_info->mapping_tree; 1203 map_tree = &root->fs_info->mapping_tree;
866 map_length = length; 1204 map_length = length;
@@ -925,12 +1263,12 @@ int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
925 btrfs_test_flag(inode, NODATASUM); 1263 btrfs_test_flag(inode, NODATASUM);
926 1264
927 if (!(rw & (1 << BIO_RW))) { 1265 if (!(rw & (1 << BIO_RW))) {
928 if (!skip_sum)
929 btrfs_lookup_bio_sums(root, inode, bio);
930 1266
931 if (bio_flags & EXTENT_BIO_COMPRESSED) 1267 if (bio_flags & EXTENT_BIO_COMPRESSED)
932 return btrfs_submit_compressed_read(inode, bio, 1268 return btrfs_submit_compressed_read(inode, bio,
933 mirror_num, bio_flags); 1269 mirror_num, bio_flags);
1270 else if (!skip_sum)
1271 btrfs_lookup_bio_sums(root, inode, bio);
934 goto mapit; 1272 goto mapit;
935 } else if (!skip_sum) { 1273 } else if (!skip_sum) {
936 /* we're doing a write, do the async checksumming */ 1274 /* we're doing a write, do the async checksumming */
@@ -966,6 +1304,9 @@ static noinline int add_pending_csums(struct btrfs_trans_handle *trans,
966 1304
967int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end) 1305int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end)
968{ 1306{
1307 if ((end & (PAGE_CACHE_SIZE - 1)) == 0) {
1308 WARN_ON(1);
1309 }
969 return set_extent_delalloc(&BTRFS_I(inode)->io_tree, start, end, 1310 return set_extent_delalloc(&BTRFS_I(inode)->io_tree, start, end,
970 GFP_NOFS); 1311 GFP_NOFS);
971} 1312}
@@ -2105,6 +2446,7 @@ noinline int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
2105 int pending_del_nr = 0; 2446 int pending_del_nr = 0;
2106 int pending_del_slot = 0; 2447 int pending_del_slot = 0;
2107 int extent_type = -1; 2448 int extent_type = -1;
2449 int encoding;
2108 u64 mask = root->sectorsize - 1; 2450 u64 mask = root->sectorsize - 1;
2109 2451
2110 if (root->ref_cows) 2452 if (root->ref_cows)
@@ -2144,6 +2486,7 @@ search_again:
2144 leaf = path->nodes[0]; 2486 leaf = path->nodes[0];
2145 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 2487 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2146 found_type = btrfs_key_type(&found_key); 2488 found_type = btrfs_key_type(&found_key);
2489 encoding = 0;
2147 2490
2148 if (found_key.objectid != inode->i_ino) 2491 if (found_key.objectid != inode->i_ino)
2149 break; 2492 break;
@@ -2156,6 +2499,10 @@ search_again:
2156 fi = btrfs_item_ptr(leaf, path->slots[0], 2499 fi = btrfs_item_ptr(leaf, path->slots[0],
2157 struct btrfs_file_extent_item); 2500 struct btrfs_file_extent_item);
2158 extent_type = btrfs_file_extent_type(leaf, fi); 2501 extent_type = btrfs_file_extent_type(leaf, fi);
2502 encoding = btrfs_file_extent_compression(leaf, fi);
2503 encoding |= btrfs_file_extent_encryption(leaf, fi);
2504 encoding |= btrfs_file_extent_other_encoding(leaf, fi);
2505
2159 if (extent_type != BTRFS_FILE_EXTENT_INLINE) { 2506 if (extent_type != BTRFS_FILE_EXTENT_INLINE) {
2160 item_end += 2507 item_end +=
2161 btrfs_file_extent_num_bytes(leaf, fi); 2508 btrfs_file_extent_num_bytes(leaf, fi);
@@ -2200,7 +2547,7 @@ search_again:
2200 if (extent_type != BTRFS_FILE_EXTENT_INLINE) { 2547 if (extent_type != BTRFS_FILE_EXTENT_INLINE) {
2201 u64 num_dec; 2548 u64 num_dec;
2202 extent_start = btrfs_file_extent_disk_bytenr(leaf, fi); 2549 extent_start = btrfs_file_extent_disk_bytenr(leaf, fi);
2203 if (!del_item) { 2550 if (!del_item && !encoding) {
2204 u64 orig_num_bytes = 2551 u64 orig_num_bytes =
2205 btrfs_file_extent_num_bytes(leaf, fi); 2552 btrfs_file_extent_num_bytes(leaf, fi);
2206 extent_num_bytes = new_size - 2553 extent_num_bytes = new_size -
@@ -2436,7 +2783,14 @@ int btrfs_cont_expand(struct inode *inode, loff_t size)
2436 last_byte = min(extent_map_end(em), block_end); 2783 last_byte = min(extent_map_end(em), block_end);
2437 last_byte = (last_byte + mask) & ~mask; 2784 last_byte = (last_byte + mask) & ~mask;
2438 if (test_bit(EXTENT_FLAG_VACANCY, &em->flags)) { 2785 if (test_bit(EXTENT_FLAG_VACANCY, &em->flags)) {
2786 u64 hint_byte = 0;
2439 hole_size = last_byte - cur_offset; 2787 hole_size = last_byte - cur_offset;
2788 err = btrfs_drop_extents(trans, root, inode,
2789 cur_offset,
2790 cur_offset + hole_size,
2791 cur_offset, &hint_byte);
2792 if (err)
2793 break;
2440 err = btrfs_insert_file_extent(trans, root, 2794 err = btrfs_insert_file_extent(trans, root,
2441 inode->i_ino, cur_offset, 0, 2795 inode->i_ino, cur_offset, 0,
2442 0, hole_size, 0, hole_size, 2796 0, hole_size, 0, hole_size,
@@ -3785,6 +4139,7 @@ int btrfs_writepages(struct address_space *mapping,
3785 struct writeback_control *wbc) 4139 struct writeback_control *wbc)
3786{ 4140{
3787 struct extent_io_tree *tree; 4141 struct extent_io_tree *tree;
4142
3788 tree = &BTRFS_I(mapping->host)->io_tree; 4143 tree = &BTRFS_I(mapping->host)->io_tree;
3789 return extent_writepages(tree, mapping, btrfs_get_extent, wbc); 4144 return extent_writepages(tree, mapping, btrfs_get_extent, wbc);
3790} 4145}
@@ -4285,9 +4640,11 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root)
4285 * ordered extents get created before we return 4640 * ordered extents get created before we return
4286 */ 4641 */
4287 atomic_inc(&root->fs_info->async_submit_draining); 4642 atomic_inc(&root->fs_info->async_submit_draining);
4288 while(atomic_read(&root->fs_info->nr_async_submits)) { 4643 while(atomic_read(&root->fs_info->nr_async_submits) ||
4644 atomic_read(&root->fs_info->async_delalloc_pages)) {
4289 wait_event(root->fs_info->async_submit_wait, 4645 wait_event(root->fs_info->async_submit_wait,
4290 (atomic_read(&root->fs_info->nr_async_submits) == 0)); 4646 (atomic_read(&root->fs_info->nr_async_submits) == 0 &&
4647 atomic_read(&root->fs_info->async_delalloc_pages) == 0));
4291 } 4648 }
4292 atomic_dec(&root->fs_info->async_submit_draining); 4649 atomic_dec(&root->fs_info->async_submit_draining);
4293 return 0; 4650 return 0;