diff options
| -rw-r--r-- | fs/btrfs/Kconfig | 2 | ||||
| -rw-r--r-- | fs/btrfs/Makefile | 2 | ||||
| -rw-r--r-- | fs/btrfs/acl.c | 4 | ||||
| -rw-r--r-- | fs/btrfs/btrfs_inode.h | 2 | ||||
| -rw-r--r-- | fs/btrfs/compression.c | 329 | ||||
| -rw-r--r-- | fs/btrfs/compression.h | 72 | ||||
| -rw-r--r-- | fs/btrfs/ctree.c | 8 | ||||
| -rw-r--r-- | fs/btrfs/ctree.h | 48 | ||||
| -rw-r--r-- | fs/btrfs/disk-io.c | 412 | ||||
| -rw-r--r-- | fs/btrfs/disk-io.h | 1 | ||||
| -rw-r--r-- | fs/btrfs/extent-tree.c | 90 | ||||
| -rw-r--r-- | fs/btrfs/extent_io.c | 7 | ||||
| -rw-r--r-- | fs/btrfs/extent_io.h | 17 | ||||
| -rw-r--r-- | fs/btrfs/extent_map.c | 2 | ||||
| -rw-r--r-- | fs/btrfs/extent_map.h | 3 | ||||
| -rw-r--r-- | fs/btrfs/file.c | 13 | ||||
| -rw-r--r-- | fs/btrfs/inode.c | 90 | ||||
| -rw-r--r-- | fs/btrfs/ioctl.c | 220 | ||||
| -rw-r--r-- | fs/btrfs/ioctl.h | 12 | ||||
| -rw-r--r-- | fs/btrfs/lzo.c | 420 | ||||
| -rw-r--r-- | fs/btrfs/ordered-data.c | 18 | ||||
| -rw-r--r-- | fs/btrfs/ordered-data.h | 8 | ||||
| -rw-r--r-- | fs/btrfs/super.c | 281 | ||||
| -rw-r--r-- | fs/btrfs/transaction.c | 11 | ||||
| -rw-r--r-- | fs/btrfs/transaction.h | 1 | ||||
| -rw-r--r-- | fs/btrfs/volumes.c | 626 | ||||
| -rw-r--r-- | fs/btrfs/volumes.h | 27 | ||||
| -rw-r--r-- | fs/btrfs/xattr.c | 18 | ||||
| -rw-r--r-- | fs/btrfs/zlib.c | 369 |
29 files changed, 2490 insertions, 623 deletions
diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig index 7bb3c020e570..ecb9fd3be143 100644 --- a/fs/btrfs/Kconfig +++ b/fs/btrfs/Kconfig | |||
| @@ -4,6 +4,8 @@ config BTRFS_FS | |||
| 4 | select LIBCRC32C | 4 | select LIBCRC32C |
| 5 | select ZLIB_INFLATE | 5 | select ZLIB_INFLATE |
| 6 | select ZLIB_DEFLATE | 6 | select ZLIB_DEFLATE |
| 7 | select LZO_COMPRESS | ||
| 8 | select LZO_DECOMPRESS | ||
| 7 | help | 9 | help |
| 8 | Btrfs is a new filesystem with extents, writable snapshotting, | 10 | Btrfs is a new filesystem with extents, writable snapshotting, |
| 9 | support for multiple devices and many more features. | 11 | support for multiple devices and many more features. |
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index a35eb36b32fd..31610ea73aec 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile | |||
| @@ -6,5 +6,5 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ | |||
| 6 | transaction.o inode.o file.o tree-defrag.o \ | 6 | transaction.o inode.o file.o tree-defrag.o \ |
| 7 | extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ | 7 | extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ |
| 8 | extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ | 8 | extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ |
| 9 | export.o tree-log.o acl.o free-space-cache.o zlib.o \ | 9 | export.o tree-log.o acl.o free-space-cache.o zlib.o lzo.o \ |
| 10 | compression.o delayed-ref.o relocation.o | 10 | compression.o delayed-ref.o relocation.o |
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c index 6ae2c8cac9d5..15b5ca2a2606 100644 --- a/fs/btrfs/acl.c +++ b/fs/btrfs/acl.c | |||
| @@ -60,8 +60,10 @@ static struct posix_acl *btrfs_get_acl(struct inode *inode, int type) | |||
| 60 | size = __btrfs_getxattr(inode, name, value, size); | 60 | size = __btrfs_getxattr(inode, name, value, size); |
| 61 | if (size > 0) { | 61 | if (size > 0) { |
| 62 | acl = posix_acl_from_xattr(value, size); | 62 | acl = posix_acl_from_xattr(value, size); |
| 63 | if (IS_ERR(acl)) | 63 | if (IS_ERR(acl)) { |
| 64 | kfree(value); | ||
| 64 | return acl; | 65 | return acl; |
| 66 | } | ||
| 65 | set_cached_acl(inode, type, acl); | 67 | set_cached_acl(inode, type, acl); |
| 66 | } | 68 | } |
| 67 | kfree(value); | 69 | kfree(value); |
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 6ad63f17eca0..ccc991c542df 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h | |||
| @@ -157,7 +157,7 @@ struct btrfs_inode { | |||
| 157 | /* | 157 | /* |
| 158 | * always compress this one file | 158 | * always compress this one file |
| 159 | */ | 159 | */ |
| 160 | unsigned force_compress:1; | 160 | unsigned force_compress:4; |
| 161 | 161 | ||
| 162 | struct inode vfs_inode; | 162 | struct inode vfs_inode; |
| 163 | }; | 163 | }; |
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index b50bc4bd5c56..f745287fbf2e 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c | |||
| @@ -62,6 +62,9 @@ struct compressed_bio { | |||
| 62 | /* number of bytes on disk */ | 62 | /* number of bytes on disk */ |
| 63 | unsigned long compressed_len; | 63 | unsigned long compressed_len; |
| 64 | 64 | ||
| 65 | /* the compression algorithm for this bio */ | ||
| 66 | int compress_type; | ||
| 67 | |||
| 65 | /* number of compressed pages in the array */ | 68 | /* number of compressed pages in the array */ |
| 66 | unsigned long nr_pages; | 69 | unsigned long nr_pages; |
| 67 | 70 | ||
| @@ -173,11 +176,12 @@ static void end_compressed_bio_read(struct bio *bio, int err) | |||
| 173 | /* ok, we're the last bio for this extent, lets start | 176 | /* ok, we're the last bio for this extent, lets start |
| 174 | * the decompression. | 177 | * the decompression. |
| 175 | */ | 178 | */ |
| 176 | ret = btrfs_zlib_decompress_biovec(cb->compressed_pages, | 179 | ret = btrfs_decompress_biovec(cb->compress_type, |
| 177 | cb->start, | 180 | cb->compressed_pages, |
| 178 | cb->orig_bio->bi_io_vec, | 181 | cb->start, |
| 179 | cb->orig_bio->bi_vcnt, | 182 | cb->orig_bio->bi_io_vec, |
| 180 | cb->compressed_len); | 183 | cb->orig_bio->bi_vcnt, |
| 184 | cb->compressed_len); | ||
| 181 | csum_failed: | 185 | csum_failed: |
| 182 | if (ret) | 186 | if (ret) |
| 183 | cb->errors = 1; | 187 | cb->errors = 1; |
| @@ -588,6 +592,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, | |||
| 588 | 592 | ||
| 589 | cb->len = uncompressed_len; | 593 | cb->len = uncompressed_len; |
| 590 | cb->compressed_len = compressed_len; | 594 | cb->compressed_len = compressed_len; |
| 595 | cb->compress_type = extent_compress_type(bio_flags); | ||
| 591 | cb->orig_bio = bio; | 596 | cb->orig_bio = bio; |
| 592 | 597 | ||
| 593 | nr_pages = (compressed_len + PAGE_CACHE_SIZE - 1) / | 598 | nr_pages = (compressed_len + PAGE_CACHE_SIZE - 1) / |
| @@ -677,3 +682,317 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, | |||
| 677 | bio_put(comp_bio); | 682 | bio_put(comp_bio); |
| 678 | return 0; | 683 | return 0; |
| 679 | } | 684 | } |
| 685 | |||
| 686 | static struct list_head comp_idle_workspace[BTRFS_COMPRESS_TYPES]; | ||
| 687 | static spinlock_t comp_workspace_lock[BTRFS_COMPRESS_TYPES]; | ||
| 688 | static int comp_num_workspace[BTRFS_COMPRESS_TYPES]; | ||
| 689 | static atomic_t comp_alloc_workspace[BTRFS_COMPRESS_TYPES]; | ||
| 690 | static wait_queue_head_t comp_workspace_wait[BTRFS_COMPRESS_TYPES]; | ||
| 691 | |||
| 692 | struct btrfs_compress_op *btrfs_compress_op[] = { | ||
| 693 | &btrfs_zlib_compress, | ||
| 694 | &btrfs_lzo_compress, | ||
| 695 | }; | ||
| 696 | |||
| 697 | int __init btrfs_init_compress(void) | ||
| 698 | { | ||
| 699 | int i; | ||
| 700 | |||
| 701 | for (i = 0; i < BTRFS_COMPRESS_TYPES; i++) { | ||
| 702 | INIT_LIST_HEAD(&comp_idle_workspace[i]); | ||
| 703 | spin_lock_init(&comp_workspace_lock[i]); | ||
| 704 | atomic_set(&comp_alloc_workspace[i], 0); | ||
| 705 | init_waitqueue_head(&comp_workspace_wait[i]); | ||
| 706 | } | ||
| 707 | return 0; | ||
| 708 | } | ||
| 709 | |||
| 710 | /* | ||
| 711 | * this finds an available workspace or allocates a new one | ||
| 712 | * ERR_PTR is returned if things go bad. | ||
| 713 | */ | ||
| 714 | static struct list_head *find_workspace(int type) | ||
| 715 | { | ||
| 716 | struct list_head *workspace; | ||
| 717 | int cpus = num_online_cpus(); | ||
| 718 | int idx = type - 1; | ||
| 719 | |||
| 720 | struct list_head *idle_workspace = &comp_idle_workspace[idx]; | ||
| 721 | spinlock_t *workspace_lock = &comp_workspace_lock[idx]; | ||
| 722 | atomic_t *alloc_workspace = &comp_alloc_workspace[idx]; | ||
| 723 | wait_queue_head_t *workspace_wait = &comp_workspace_wait[idx]; | ||
| 724 | int *num_workspace = &comp_num_workspace[idx]; | ||
| 725 | again: | ||
| 726 | spin_lock(workspace_lock); | ||
| 727 | if (!list_empty(idle_workspace)) { | ||
| 728 | workspace = idle_workspace->next; | ||
| 729 | list_del(workspace); | ||
| 730 | (*num_workspace)--; | ||
| 731 | spin_unlock(workspace_lock); | ||
| 732 | return workspace; | ||
| 733 | |||
| 734 | } | ||
| 735 | if (atomic_read(alloc_workspace) > cpus) { | ||
| 736 | DEFINE_WAIT(wait); | ||
| 737 | |||
| 738 | spin_unlock(workspace_lock); | ||
| 739 | prepare_to_wait(workspace_wait, &wait, TASK_UNINTERRUPTIBLE); | ||
| 740 | if (atomic_read(alloc_workspace) > cpus && !*num_workspace) | ||
| 741 | schedule(); | ||
| 742 | finish_wait(workspace_wait, &wait); | ||
| 743 | goto again; | ||
| 744 | } | ||
| 745 | atomic_inc(alloc_workspace); | ||
| 746 | spin_unlock(workspace_lock); | ||
| 747 | |||
| 748 | workspace = btrfs_compress_op[idx]->alloc_workspace(); | ||
| 749 | if (IS_ERR(workspace)) { | ||
| 750 | atomic_dec(alloc_workspace); | ||
| 751 | wake_up(workspace_wait); | ||
| 752 | } | ||
| 753 | return workspace; | ||
| 754 | } | ||
| 755 | |||
| 756 | /* | ||
| 757 | * put a workspace struct back on the list or free it if we have enough | ||
| 758 | * idle ones sitting around | ||
| 759 | */ | ||
| 760 | static void free_workspace(int type, struct list_head *workspace) | ||
| 761 | { | ||
| 762 | int idx = type - 1; | ||
| 763 | struct list_head *idle_workspace = &comp_idle_workspace[idx]; | ||
| 764 | spinlock_t *workspace_lock = &comp_workspace_lock[idx]; | ||
| 765 | atomic_t *alloc_workspace = &comp_alloc_workspace[idx]; | ||
| 766 | wait_queue_head_t *workspace_wait = &comp_workspace_wait[idx]; | ||
| 767 | int *num_workspace = &comp_num_workspace[idx]; | ||
| 768 | |||
| 769 | spin_lock(workspace_lock); | ||
| 770 | if (*num_workspace < num_online_cpus()) { | ||
| 771 | list_add_tail(workspace, idle_workspace); | ||
| 772 | (*num_workspace)++; | ||
| 773 | spin_unlock(workspace_lock); | ||
| 774 | goto wake; | ||
| 775 | } | ||
| 776 | spin_unlock(workspace_lock); | ||
| 777 | |||
| 778 | btrfs_compress_op[idx]->free_workspace(workspace); | ||
| 779 | atomic_dec(alloc_workspace); | ||
| 780 | wake: | ||
| 781 | if (waitqueue_active(workspace_wait)) | ||
| 782 | wake_up(workspace_wait); | ||
| 783 | } | ||
| 784 | |||
| 785 | /* | ||
| 786 | * cleanup function for module exit | ||
| 787 | */ | ||
| 788 | static void free_workspaces(void) | ||
| 789 | { | ||
| 790 | struct list_head *workspace; | ||
| 791 | int i; | ||
| 792 | |||
| 793 | for (i = 0; i < BTRFS_COMPRESS_TYPES; i++) { | ||
| 794 | while (!list_empty(&comp_idle_workspace[i])) { | ||
| 795 | workspace = comp_idle_workspace[i].next; | ||
| 796 | list_del(workspace); | ||
| 797 | btrfs_compress_op[i]->free_workspace(workspace); | ||
| 798 | atomic_dec(&comp_alloc_workspace[i]); | ||
| 799 | } | ||
| 800 | } | ||
| 801 | } | ||
| 802 | |||
| 803 | /* | ||
| 804 | * given an address space and start/len, compress the bytes. | ||
| 805 | * | ||
| 806 | * pages are allocated to hold the compressed result and stored | ||
| 807 | * in 'pages' | ||
| 808 | * | ||
| 809 | * out_pages is used to return the number of pages allocated. There | ||
| 810 | * may be pages allocated even if we return an error | ||
| 811 | * | ||
| 812 | * total_in is used to return the number of bytes actually read. It | ||
| 813 | * may be smaller then len if we had to exit early because we | ||
| 814 | * ran out of room in the pages array or because we cross the | ||
| 815 | * max_out threshold. | ||
| 816 | * | ||
| 817 | * total_out is used to return the total number of compressed bytes | ||
| 818 | * | ||
| 819 | * max_out tells us the max number of bytes that we're allowed to | ||
| 820 | * stuff into pages | ||
| 821 | */ | ||
| 822 | int btrfs_compress_pages(int type, struct address_space *mapping, | ||
| 823 | u64 start, unsigned long len, | ||
| 824 | struct page **pages, | ||
| 825 | unsigned long nr_dest_pages, | ||
| 826 | unsigned long *out_pages, | ||
| 827 | unsigned long *total_in, | ||
| 828 | unsigned long *total_out, | ||
| 829 | unsigned long max_out) | ||
| 830 | { | ||
| 831 | struct list_head *workspace; | ||
| 832 | int ret; | ||
| 833 | |||
| 834 | workspace = find_workspace(type); | ||
| 835 | if (IS_ERR(workspace)) | ||
| 836 | return -1; | ||
| 837 | |||
| 838 | ret = btrfs_compress_op[type-1]->compress_pages(workspace, mapping, | ||
| 839 | start, len, pages, | ||
| 840 | nr_dest_pages, out_pages, | ||
| 841 | total_in, total_out, | ||
| 842 | max_out); | ||
| 843 | free_workspace(type, workspace); | ||
| 844 | return ret; | ||
| 845 | } | ||
| 846 | |||
| 847 | /* | ||
| 848 | * pages_in is an array of pages with compressed data. | ||
| 849 | * | ||
| 850 | * disk_start is the starting logical offset of this array in the file | ||
| 851 | * | ||
| 852 | * bvec is a bio_vec of pages from the file that we want to decompress into | ||
| 853 | * | ||
| 854 | * vcnt is the count of pages in the biovec | ||
| 855 | * | ||
| 856 | * srclen is the number of bytes in pages_in | ||
| 857 | * | ||
| 858 | * The basic idea is that we have a bio that was created by readpages. | ||
| 859 | * The pages in the bio are for the uncompressed data, and they may not | ||
| 860 | * be contiguous. They all correspond to the range of bytes covered by | ||
| 861 | * the compressed extent. | ||
| 862 | */ | ||
| 863 | int btrfs_decompress_biovec(int type, struct page **pages_in, u64 disk_start, | ||
| 864 | struct bio_vec *bvec, int vcnt, size_t srclen) | ||
| 865 | { | ||
| 866 | struct list_head *workspace; | ||
| 867 | int ret; | ||
| 868 | |||
| 869 | workspace = find_workspace(type); | ||
| 870 | if (IS_ERR(workspace)) | ||
| 871 | return -ENOMEM; | ||
| 872 | |||
| 873 | ret = btrfs_compress_op[type-1]->decompress_biovec(workspace, pages_in, | ||
| 874 | disk_start, | ||
| 875 | bvec, vcnt, srclen); | ||
| 876 | free_workspace(type, workspace); | ||
| 877 | return ret; | ||
| 878 | } | ||
| 879 | |||
| 880 | /* | ||
| 881 | * a less complex decompression routine. Our compressed data fits in a | ||
| 882 | * single page, and we want to read a single page out of it. | ||
| 883 | * start_byte tells us the offset into the compressed data we're interested in | ||
| 884 | */ | ||
| 885 | int btrfs_decompress(int type, unsigned char *data_in, struct page *dest_page, | ||
| 886 | unsigned long start_byte, size_t srclen, size_t destlen) | ||
| 887 | { | ||
| 888 | struct list_head *workspace; | ||
| 889 | int ret; | ||
| 890 | |||
| 891 | workspace = find_workspace(type); | ||
| 892 | if (IS_ERR(workspace)) | ||
| 893 | return -ENOMEM; | ||
| 894 | |||
| 895 | ret = btrfs_compress_op[type-1]->decompress(workspace, data_in, | ||
| 896 | dest_page, start_byte, | ||
| 897 | srclen, destlen); | ||
| 898 | |||
| 899 | free_workspace(type, workspace); | ||
| 900 | return ret; | ||
| 901 | } | ||
| 902 | |||
| 903 | void __exit btrfs_exit_compress(void) | ||
| 904 | { | ||
| 905 | free_workspaces(); | ||
| 906 | } | ||
| 907 | |||
| 908 | /* | ||
| 909 | * Copy uncompressed data from working buffer to pages. | ||
| 910 | * | ||
| 911 | * buf_start is the byte offset we're of the start of our workspace buffer. | ||
| 912 | * | ||
| 913 | * total_out is the last byte of the buffer | ||
| 914 | */ | ||
| 915 | int btrfs_decompress_buf2page(char *buf, unsigned long buf_start, | ||
| 916 | unsigned long total_out, u64 disk_start, | ||
| 917 | struct bio_vec *bvec, int vcnt, | ||
| 918 | unsigned long *page_index, | ||
| 919 | unsigned long *pg_offset) | ||
| 920 | { | ||
| 921 | unsigned long buf_offset; | ||
| 922 | unsigned long current_buf_start; | ||
| 923 | unsigned long start_byte; | ||
| 924 | unsigned long working_bytes = total_out - buf_start; | ||
| 925 | unsigned long bytes; | ||
| 926 | char *kaddr; | ||
| 927 | struct page *page_out = bvec[*page_index].bv_page; | ||
| 928 | |||
| 929 | /* | ||
| 930 | * start byte is the first byte of the page we're currently | ||
| 931 | * copying into relative to the start of the compressed data. | ||
| 932 | */ | ||
| 933 | start_byte = page_offset(page_out) - disk_start; | ||
| 934 | |||
| 935 | /* we haven't yet hit data corresponding to this page */ | ||
| 936 | if (total_out <= start_byte) | ||
| 937 | return 1; | ||
| 938 | |||
| 939 | /* | ||
| 940 | * the start of the data we care about is offset into | ||
| 941 | * the middle of our working buffer | ||
| 942 | */ | ||
| 943 | if (total_out > start_byte && buf_start < start_byte) { | ||
| 944 | buf_offset = start_byte - buf_start; | ||
| 945 | working_bytes -= buf_offset; | ||
| 946 | } else { | ||
| 947 | buf_offset = 0; | ||
| 948 | } | ||
| 949 | current_buf_start = buf_start; | ||
| 950 | |||
| 951 | /* copy bytes from the working buffer into the pages */ | ||
| 952 | while (working_bytes > 0) { | ||
| 953 | bytes = min(PAGE_CACHE_SIZE - *pg_offset, | ||
| 954 | PAGE_CACHE_SIZE - buf_offset); | ||
| 955 | bytes = min(bytes, working_bytes); | ||
| 956 | kaddr = kmap_atomic(page_out, KM_USER0); | ||
| 957 | memcpy(kaddr + *pg_offset, buf + buf_offset, bytes); | ||
| 958 | kunmap_atomic(kaddr, KM_USER0); | ||
| 959 | flush_dcache_page(page_out); | ||
| 960 | |||
| 961 | *pg_offset += bytes; | ||
| 962 | buf_offset += bytes; | ||
| 963 | working_bytes -= bytes; | ||
| 964 | current_buf_start += bytes; | ||
| 965 | |||
| 966 | /* check if we need to pick another page */ | ||
| 967 | if (*pg_offset == PAGE_CACHE_SIZE) { | ||
| 968 | (*page_index)++; | ||
| 969 | if (*page_index >= vcnt) | ||
| 970 | return 0; | ||
| 971 | |||
| 972 | page_out = bvec[*page_index].bv_page; | ||
| 973 | *pg_offset = 0; | ||
| 974 | start_byte = page_offset(page_out) - disk_start; | ||
| 975 | |||
| 976 | /* | ||
| 977 | * make sure our new page is covered by this | ||
| 978 | * working buffer | ||
| 979 | */ | ||
| 980 | if (total_out <= start_byte) | ||
| 981 | return 1; | ||
| 982 | |||
| 983 | /* | ||
| 984 | * the next page in the biovec might not be adjacent | ||
| 985 | * to the last page, but it might still be found | ||
| 986 | * inside this working buffer. bump our offset pointer | ||
| 987 | */ | ||
| 988 | if (total_out > start_byte && | ||
| 989 | current_buf_start < start_byte) { | ||
| 990 | buf_offset = start_byte - buf_start; | ||
| 991 | working_bytes = total_out - start_byte; | ||
| 992 | current_buf_start = buf_start + buf_offset; | ||
| 993 | } | ||
| 994 | } | ||
| 995 | } | ||
| 996 | |||
| 997 | return 1; | ||
| 998 | } | ||
diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h index 421f5b4aa715..51000174b9d7 100644 --- a/fs/btrfs/compression.h +++ b/fs/btrfs/compression.h | |||
| @@ -19,24 +19,27 @@ | |||
| 19 | #ifndef __BTRFS_COMPRESSION_ | 19 | #ifndef __BTRFS_COMPRESSION_ |
| 20 | #define __BTRFS_COMPRESSION_ | 20 | #define __BTRFS_COMPRESSION_ |
| 21 | 21 | ||
| 22 | int btrfs_zlib_decompress(unsigned char *data_in, | 22 | int btrfs_init_compress(void); |
| 23 | struct page *dest_page, | 23 | void btrfs_exit_compress(void); |
| 24 | unsigned long start_byte, | 24 | |
| 25 | size_t srclen, size_t destlen); | 25 | int btrfs_compress_pages(int type, struct address_space *mapping, |
| 26 | int btrfs_zlib_compress_pages(struct address_space *mapping, | 26 | u64 start, unsigned long len, |
| 27 | u64 start, unsigned long len, | 27 | struct page **pages, |
| 28 | struct page **pages, | 28 | unsigned long nr_dest_pages, |
| 29 | unsigned long nr_dest_pages, | 29 | unsigned long *out_pages, |
| 30 | unsigned long *out_pages, | 30 | unsigned long *total_in, |
| 31 | unsigned long *total_in, | 31 | unsigned long *total_out, |
| 32 | unsigned long *total_out, | 32 | unsigned long max_out); |
| 33 | unsigned long max_out); | 33 | int btrfs_decompress_biovec(int type, struct page **pages_in, u64 disk_start, |
| 34 | int btrfs_zlib_decompress_biovec(struct page **pages_in, | 34 | struct bio_vec *bvec, int vcnt, size_t srclen); |
| 35 | u64 disk_start, | 35 | int btrfs_decompress(int type, unsigned char *data_in, struct page *dest_page, |
| 36 | struct bio_vec *bvec, | 36 | unsigned long start_byte, size_t srclen, size_t destlen); |
| 37 | int vcnt, | 37 | int btrfs_decompress_buf2page(char *buf, unsigned long buf_start, |
| 38 | size_t srclen); | 38 | unsigned long total_out, u64 disk_start, |
| 39 | void btrfs_zlib_exit(void); | 39 | struct bio_vec *bvec, int vcnt, |
| 40 | unsigned long *page_index, | ||
| 41 | unsigned long *pg_offset); | ||
| 42 | |||
| 40 | int btrfs_submit_compressed_write(struct inode *inode, u64 start, | 43 | int btrfs_submit_compressed_write(struct inode *inode, u64 start, |
| 41 | unsigned long len, u64 disk_start, | 44 | unsigned long len, u64 disk_start, |
| 42 | unsigned long compressed_len, | 45 | unsigned long compressed_len, |
| @@ -44,4 +47,37 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start, | |||
| 44 | unsigned long nr_pages); | 47 | unsigned long nr_pages); |
| 45 | int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, | 48 | int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, |
| 46 | int mirror_num, unsigned long bio_flags); | 49 | int mirror_num, unsigned long bio_flags); |
| 50 | |||
| 51 | struct btrfs_compress_op { | ||
| 52 | struct list_head *(*alloc_workspace)(void); | ||
| 53 | |||
| 54 | void (*free_workspace)(struct list_head *workspace); | ||
| 55 | |||
| 56 | int (*compress_pages)(struct list_head *workspace, | ||
| 57 | struct address_space *mapping, | ||
| 58 | u64 start, unsigned long len, | ||
| 59 | struct page **pages, | ||
| 60 | unsigned long nr_dest_pages, | ||
| 61 | unsigned long *out_pages, | ||
| 62 | unsigned long *total_in, | ||
| 63 | unsigned long *total_out, | ||
| 64 | unsigned long max_out); | ||
| 65 | |||
| 66 | int (*decompress_biovec)(struct list_head *workspace, | ||
| 67 | struct page **pages_in, | ||
| 68 | u64 disk_start, | ||
| 69 | struct bio_vec *bvec, | ||
| 70 | int vcnt, | ||
| 71 | size_t srclen); | ||
| 72 | |||
| 73 | int (*decompress)(struct list_head *workspace, | ||
| 74 | unsigned char *data_in, | ||
| 75 | struct page *dest_page, | ||
| 76 | unsigned long start_byte, | ||
| 77 | size_t srclen, size_t destlen); | ||
| 78 | }; | ||
| 79 | |||
| 80 | extern struct btrfs_compress_op btrfs_zlib_compress; | ||
| 81 | extern struct btrfs_compress_op btrfs_lzo_compress; | ||
| 82 | |||
| 47 | #endif | 83 | #endif |
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 9ac171599258..b5baff0dccfe 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c | |||
| @@ -105,6 +105,8 @@ noinline void btrfs_clear_path_blocking(struct btrfs_path *p, | |||
| 105 | /* this also releases the path */ | 105 | /* this also releases the path */ |
| 106 | void btrfs_free_path(struct btrfs_path *p) | 106 | void btrfs_free_path(struct btrfs_path *p) |
| 107 | { | 107 | { |
| 108 | if (!p) | ||
| 109 | return; | ||
| 108 | btrfs_release_path(NULL, p); | 110 | btrfs_release_path(NULL, p); |
| 109 | kmem_cache_free(btrfs_path_cachep, p); | 111 | kmem_cache_free(btrfs_path_cachep, p); |
| 110 | } | 112 | } |
| @@ -2514,6 +2516,9 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root | |||
| 2514 | btrfs_assert_tree_locked(path->nodes[1]); | 2516 | btrfs_assert_tree_locked(path->nodes[1]); |
| 2515 | 2517 | ||
| 2516 | right = read_node_slot(root, upper, slot + 1); | 2518 | right = read_node_slot(root, upper, slot + 1); |
| 2519 | if (right == NULL) | ||
| 2520 | return 1; | ||
| 2521 | |||
| 2517 | btrfs_tree_lock(right); | 2522 | btrfs_tree_lock(right); |
| 2518 | btrfs_set_lock_blocking(right); | 2523 | btrfs_set_lock_blocking(right); |
| 2519 | 2524 | ||
| @@ -2764,6 +2769,9 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root | |||
| 2764 | btrfs_assert_tree_locked(path->nodes[1]); | 2769 | btrfs_assert_tree_locked(path->nodes[1]); |
| 2765 | 2770 | ||
| 2766 | left = read_node_slot(root, path->nodes[1], slot - 1); | 2771 | left = read_node_slot(root, path->nodes[1], slot - 1); |
| 2772 | if (left == NULL) | ||
| 2773 | return 1; | ||
| 2774 | |||
| 2767 | btrfs_tree_lock(left); | 2775 | btrfs_tree_lock(left); |
| 2768 | btrfs_set_lock_blocking(left); | 2776 | btrfs_set_lock_blocking(left); |
| 2769 | 2777 | ||
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index b875d445ea81..2c98b3af6052 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
| @@ -295,6 +295,14 @@ static inline unsigned long btrfs_chunk_item_size(int num_stripes) | |||
| 295 | #define BTRFS_FSID_SIZE 16 | 295 | #define BTRFS_FSID_SIZE 16 |
| 296 | #define BTRFS_HEADER_FLAG_WRITTEN (1ULL << 0) | 296 | #define BTRFS_HEADER_FLAG_WRITTEN (1ULL << 0) |
| 297 | #define BTRFS_HEADER_FLAG_RELOC (1ULL << 1) | 297 | #define BTRFS_HEADER_FLAG_RELOC (1ULL << 1) |
| 298 | |||
| 299 | /* | ||
| 300 | * File system states | ||
| 301 | */ | ||
| 302 | |||
| 303 | /* Errors detected */ | ||
| 304 | #define BTRFS_SUPER_FLAG_ERROR (1ULL << 2) | ||
| 305 | |||
| 298 | #define BTRFS_SUPER_FLAG_SEEDING (1ULL << 32) | 306 | #define BTRFS_SUPER_FLAG_SEEDING (1ULL << 32) |
| 299 | #define BTRFS_SUPER_FLAG_METADUMP (1ULL << 33) | 307 | #define BTRFS_SUPER_FLAG_METADUMP (1ULL << 33) |
| 300 | 308 | ||
| @@ -399,13 +407,15 @@ struct btrfs_super_block { | |||
| 399 | #define BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF (1ULL << 0) | 407 | #define BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF (1ULL << 0) |
| 400 | #define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL (1ULL << 1) | 408 | #define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL (1ULL << 1) |
| 401 | #define BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS (1ULL << 2) | 409 | #define BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS (1ULL << 2) |
| 410 | #define BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO (1ULL << 3) | ||
| 402 | 411 | ||
| 403 | #define BTRFS_FEATURE_COMPAT_SUPP 0ULL | 412 | #define BTRFS_FEATURE_COMPAT_SUPP 0ULL |
| 404 | #define BTRFS_FEATURE_COMPAT_RO_SUPP 0ULL | 413 | #define BTRFS_FEATURE_COMPAT_RO_SUPP 0ULL |
| 405 | #define BTRFS_FEATURE_INCOMPAT_SUPP \ | 414 | #define BTRFS_FEATURE_INCOMPAT_SUPP \ |
| 406 | (BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF | \ | 415 | (BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF | \ |
| 407 | BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL | \ | 416 | BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL | \ |
| 408 | BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) | 417 | BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS | \ |
| 418 | BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO) | ||
| 409 | 419 | ||
| 410 | /* | 420 | /* |
| 411 | * A leaf is full of items. offset and size tell us where to find | 421 | * A leaf is full of items. offset and size tell us where to find |
| @@ -552,9 +562,11 @@ struct btrfs_timespec { | |||
| 552 | } __attribute__ ((__packed__)); | 562 | } __attribute__ ((__packed__)); |
| 553 | 563 | ||
| 554 | enum btrfs_compression_type { | 564 | enum btrfs_compression_type { |
| 555 | BTRFS_COMPRESS_NONE = 0, | 565 | BTRFS_COMPRESS_NONE = 0, |
| 556 | BTRFS_COMPRESS_ZLIB = 1, | 566 | BTRFS_COMPRESS_ZLIB = 1, |
| 557 | BTRFS_COMPRESS_LAST = 2, | 567 | BTRFS_COMPRESS_LZO = 2, |
| 568 | BTRFS_COMPRESS_TYPES = 2, | ||
| 569 | BTRFS_COMPRESS_LAST = 3, | ||
| 558 | }; | 570 | }; |
| 559 | 571 | ||
| 560 | struct btrfs_inode_item { | 572 | struct btrfs_inode_item { |
| @@ -598,6 +610,8 @@ struct btrfs_dir_item { | |||
| 598 | u8 type; | 610 | u8 type; |
| 599 | } __attribute__ ((__packed__)); | 611 | } __attribute__ ((__packed__)); |
| 600 | 612 | ||
| 613 | #define BTRFS_ROOT_SUBVOL_RDONLY (1ULL << 0) | ||
| 614 | |||
| 601 | struct btrfs_root_item { | 615 | struct btrfs_root_item { |
| 602 | struct btrfs_inode_item inode; | 616 | struct btrfs_inode_item inode; |
| 603 | __le64 generation; | 617 | __le64 generation; |
| @@ -896,7 +910,8 @@ struct btrfs_fs_info { | |||
| 896 | */ | 910 | */ |
| 897 | u64 last_trans_log_full_commit; | 911 | u64 last_trans_log_full_commit; |
| 898 | u64 open_ioctl_trans; | 912 | u64 open_ioctl_trans; |
| 899 | unsigned long mount_opt; | 913 | unsigned long mount_opt:20; |
| 914 | unsigned long compress_type:4; | ||
| 900 | u64 max_inline; | 915 | u64 max_inline; |
| 901 | u64 alloc_start; | 916 | u64 alloc_start; |
| 902 | struct btrfs_transaction *running_transaction; | 917 | struct btrfs_transaction *running_transaction; |
| @@ -1051,6 +1066,9 @@ struct btrfs_fs_info { | |||
| 1051 | unsigned metadata_ratio; | 1066 | unsigned metadata_ratio; |
| 1052 | 1067 | ||
| 1053 | void *bdev_holder; | 1068 | void *bdev_holder; |
| 1069 | |||
| 1070 | /* filesystem state */ | ||
| 1071 | u64 fs_state; | ||
| 1054 | }; | 1072 | }; |
| 1055 | 1073 | ||
| 1056 | /* | 1074 | /* |
| @@ -1894,6 +1912,11 @@ BTRFS_SETGET_STACK_FUNCS(root_limit, struct btrfs_root_item, byte_limit, 64); | |||
| 1894 | BTRFS_SETGET_STACK_FUNCS(root_last_snapshot, struct btrfs_root_item, | 1912 | BTRFS_SETGET_STACK_FUNCS(root_last_snapshot, struct btrfs_root_item, |
| 1895 | last_snapshot, 64); | 1913 | last_snapshot, 64); |
| 1896 | 1914 | ||
| 1915 | static inline bool btrfs_root_readonly(struct btrfs_root *root) | ||
| 1916 | { | ||
| 1917 | return root->root_item.flags & BTRFS_ROOT_SUBVOL_RDONLY; | ||
| 1918 | } | ||
| 1919 | |||
| 1897 | /* struct btrfs_super_block */ | 1920 | /* struct btrfs_super_block */ |
| 1898 | 1921 | ||
| 1899 | BTRFS_SETGET_STACK_FUNCS(super_bytenr, struct btrfs_super_block, bytenr, 64); | 1922 | BTRFS_SETGET_STACK_FUNCS(super_bytenr, struct btrfs_super_block, bytenr, 64); |
| @@ -2146,6 +2169,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, | |||
| 2146 | int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | 2169 | int btrfs_remove_block_group(struct btrfs_trans_handle *trans, |
| 2147 | struct btrfs_root *root, u64 group_start); | 2170 | struct btrfs_root *root, u64 group_start); |
| 2148 | u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags); | 2171 | u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags); |
| 2172 | u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data); | ||
| 2149 | void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde); | 2173 | void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde); |
| 2150 | void btrfs_clear_space_info_full(struct btrfs_fs_info *info); | 2174 | void btrfs_clear_space_info_full(struct btrfs_fs_info *info); |
| 2151 | int btrfs_check_data_free_space(struct inode *inode, u64 bytes); | 2175 | int btrfs_check_data_free_space(struct inode *inode, u64 bytes); |
| @@ -2189,6 +2213,12 @@ int btrfs_set_block_group_ro(struct btrfs_root *root, | |||
| 2189 | int btrfs_set_block_group_rw(struct btrfs_root *root, | 2213 | int btrfs_set_block_group_rw(struct btrfs_root *root, |
| 2190 | struct btrfs_block_group_cache *cache); | 2214 | struct btrfs_block_group_cache *cache); |
| 2191 | void btrfs_put_block_group_cache(struct btrfs_fs_info *info); | 2215 | void btrfs_put_block_group_cache(struct btrfs_fs_info *info); |
| 2216 | u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo); | ||
| 2217 | int btrfs_error_unpin_extent_range(struct btrfs_root *root, | ||
| 2218 | u64 start, u64 end); | ||
| 2219 | int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr, | ||
| 2220 | u64 num_bytes); | ||
| 2221 | |||
| 2192 | /* ctree.c */ | 2222 | /* ctree.c */ |
| 2193 | int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, | 2223 | int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, |
| 2194 | int level, int *slot); | 2224 | int level, int *slot); |
| @@ -2542,6 +2572,14 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size); | |||
| 2542 | /* super.c */ | 2572 | /* super.c */ |
| 2543 | int btrfs_parse_options(struct btrfs_root *root, char *options); | 2573 | int btrfs_parse_options(struct btrfs_root *root, char *options); |
| 2544 | int btrfs_sync_fs(struct super_block *sb, int wait); | 2574 | int btrfs_sync_fs(struct super_block *sb, int wait); |
| 2575 | void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function, | ||
| 2576 | unsigned int line, int errno); | ||
| 2577 | |||
| 2578 | #define btrfs_std_error(fs_info, errno) \ | ||
| 2579 | do { \ | ||
| 2580 | if ((errno)) \ | ||
| 2581 | __btrfs_std_error((fs_info), __func__, __LINE__, (errno));\ | ||
| 2582 | } while (0) | ||
| 2545 | 2583 | ||
| 2546 | /* acl.c */ | 2584 | /* acl.c */ |
| 2547 | #ifdef CONFIG_BTRFS_FS_POSIX_ACL | 2585 | #ifdef CONFIG_BTRFS_FS_POSIX_ACL |
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 51d2e4de34eb..b531c36455d8 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
| @@ -44,6 +44,20 @@ | |||
| 44 | static struct extent_io_ops btree_extent_io_ops; | 44 | static struct extent_io_ops btree_extent_io_ops; |
| 45 | static void end_workqueue_fn(struct btrfs_work *work); | 45 | static void end_workqueue_fn(struct btrfs_work *work); |
| 46 | static void free_fs_root(struct btrfs_root *root); | 46 | static void free_fs_root(struct btrfs_root *root); |
| 47 | static void btrfs_check_super_valid(struct btrfs_fs_info *fs_info, | ||
| 48 | int read_only); | ||
| 49 | static int btrfs_destroy_ordered_operations(struct btrfs_root *root); | ||
| 50 | static int btrfs_destroy_ordered_extents(struct btrfs_root *root); | ||
| 51 | static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, | ||
| 52 | struct btrfs_root *root); | ||
| 53 | static int btrfs_destroy_pending_snapshots(struct btrfs_transaction *t); | ||
| 54 | static int btrfs_destroy_delalloc_inodes(struct btrfs_root *root); | ||
| 55 | static int btrfs_destroy_marked_extents(struct btrfs_root *root, | ||
| 56 | struct extent_io_tree *dirty_pages, | ||
| 57 | int mark); | ||
| 58 | static int btrfs_destroy_pinned_extent(struct btrfs_root *root, | ||
| 59 | struct extent_io_tree *pinned_extents); | ||
| 60 | static int btrfs_cleanup_transaction(struct btrfs_root *root); | ||
| 47 | 61 | ||
| 48 | /* | 62 | /* |
| 49 | * end_io_wq structs are used to do processing in task context when an IO is | 63 | * end_io_wq structs are used to do processing in task context when an IO is |
| @@ -353,6 +367,10 @@ static int csum_dirty_buffer(struct btrfs_root *root, struct page *page) | |||
| 353 | WARN_ON(len == 0); | 367 | WARN_ON(len == 0); |
| 354 | 368 | ||
| 355 | eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS); | 369 | eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS); |
| 370 | if (eb == NULL) { | ||
| 371 | WARN_ON(1); | ||
| 372 | goto out; | ||
| 373 | } | ||
| 356 | ret = btree_read_extent_buffer_pages(root, eb, start + PAGE_CACHE_SIZE, | 374 | ret = btree_read_extent_buffer_pages(root, eb, start + PAGE_CACHE_SIZE, |
| 357 | btrfs_header_generation(eb)); | 375 | btrfs_header_generation(eb)); |
| 358 | BUG_ON(ret); | 376 | BUG_ON(ret); |
| @@ -427,6 +445,10 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, | |||
| 427 | WARN_ON(len == 0); | 445 | WARN_ON(len == 0); |
| 428 | 446 | ||
| 429 | eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS); | 447 | eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS); |
| 448 | if (eb == NULL) { | ||
| 449 | ret = -EIO; | ||
| 450 | goto out; | ||
| 451 | } | ||
| 430 | 452 | ||
| 431 | found_start = btrfs_header_bytenr(eb); | 453 | found_start = btrfs_header_bytenr(eb); |
| 432 | if (found_start != start) { | 454 | if (found_start != start) { |
| @@ -1145,6 +1167,7 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root, | |||
| 1145 | } | 1167 | } |
| 1146 | btrfs_free_path(path); | 1168 | btrfs_free_path(path); |
| 1147 | if (ret) { | 1169 | if (ret) { |
| 1170 | kfree(root); | ||
| 1148 | if (ret > 0) | 1171 | if (ret > 0) |
| 1149 | ret = -ENOENT; | 1172 | ret = -ENOENT; |
| 1150 | return ERR_PTR(ret); | 1173 | return ERR_PTR(ret); |
| @@ -1713,8 +1736,10 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
| 1713 | fs_info, BTRFS_ROOT_TREE_OBJECTID); | 1736 | fs_info, BTRFS_ROOT_TREE_OBJECTID); |
| 1714 | 1737 | ||
| 1715 | bh = btrfs_read_dev_super(fs_devices->latest_bdev); | 1738 | bh = btrfs_read_dev_super(fs_devices->latest_bdev); |
| 1716 | if (!bh) | 1739 | if (!bh) { |
| 1740 | err = -EINVAL; | ||
| 1717 | goto fail_iput; | 1741 | goto fail_iput; |
| 1742 | } | ||
| 1718 | 1743 | ||
| 1719 | memcpy(&fs_info->super_copy, bh->b_data, sizeof(fs_info->super_copy)); | 1744 | memcpy(&fs_info->super_copy, bh->b_data, sizeof(fs_info->super_copy)); |
| 1720 | memcpy(&fs_info->super_for_commit, &fs_info->super_copy, | 1745 | memcpy(&fs_info->super_for_commit, &fs_info->super_copy, |
| @@ -1727,6 +1752,11 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
| 1727 | if (!btrfs_super_root(disk_super)) | 1752 | if (!btrfs_super_root(disk_super)) |
| 1728 | goto fail_iput; | 1753 | goto fail_iput; |
| 1729 | 1754 | ||
| 1755 | /* check FS state, whether FS is broken. */ | ||
| 1756 | fs_info->fs_state |= btrfs_super_flags(disk_super); | ||
| 1757 | |||
| 1758 | btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY); | ||
| 1759 | |||
| 1730 | ret = btrfs_parse_options(tree_root, options); | 1760 | ret = btrfs_parse_options(tree_root, options); |
| 1731 | if (ret) { | 1761 | if (ret) { |
| 1732 | err = ret; | 1762 | err = ret; |
| @@ -1744,10 +1774,10 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
| 1744 | } | 1774 | } |
| 1745 | 1775 | ||
| 1746 | features = btrfs_super_incompat_flags(disk_super); | 1776 | features = btrfs_super_incompat_flags(disk_super); |
| 1747 | if (!(features & BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF)) { | 1777 | features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF; |
| 1748 | features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF; | 1778 | if (tree_root->fs_info->compress_type & BTRFS_COMPRESS_LZO) |
| 1749 | btrfs_set_super_incompat_flags(disk_super, features); | 1779 | features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO; |
| 1750 | } | 1780 | btrfs_set_super_incompat_flags(disk_super, features); |
| 1751 | 1781 | ||
| 1752 | features = btrfs_super_compat_ro_flags(disk_super) & | 1782 | features = btrfs_super_compat_ro_flags(disk_super) & |
| 1753 | ~BTRFS_FEATURE_COMPAT_RO_SUPP; | 1783 | ~BTRFS_FEATURE_COMPAT_RO_SUPP; |
| @@ -1957,7 +1987,9 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
| 1957 | btrfs_set_opt(fs_info->mount_opt, SSD); | 1987 | btrfs_set_opt(fs_info->mount_opt, SSD); |
| 1958 | } | 1988 | } |
| 1959 | 1989 | ||
| 1960 | if (btrfs_super_log_root(disk_super) != 0) { | 1990 | /* do not make disk changes in broken FS */ |
| 1991 | if (btrfs_super_log_root(disk_super) != 0 && | ||
| 1992 | !(fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)) { | ||
| 1961 | u64 bytenr = btrfs_super_log_root(disk_super); | 1993 | u64 bytenr = btrfs_super_log_root(disk_super); |
| 1962 | 1994 | ||
| 1963 | if (fs_devices->rw_devices == 0) { | 1995 | if (fs_devices->rw_devices == 0) { |
| @@ -2442,8 +2474,28 @@ int close_ctree(struct btrfs_root *root) | |||
| 2442 | smp_mb(); | 2474 | smp_mb(); |
| 2443 | 2475 | ||
| 2444 | btrfs_put_block_group_cache(fs_info); | 2476 | btrfs_put_block_group_cache(fs_info); |
| 2477 | |||
| 2478 | /* | ||
| 2479 | * Here come 2 situations when btrfs is broken to flip readonly: | ||
| 2480 | * | ||
| 2481 | * 1. when btrfs flips readonly somewhere else before | ||
| 2482 | * btrfs_commit_super, sb->s_flags has MS_RDONLY flag, | ||
| 2483 | * and btrfs will skip to write sb directly to keep | ||
| 2484 | * ERROR state on disk. | ||
| 2485 | * | ||
| 2486 | * 2. when btrfs flips readonly just in btrfs_commit_super, | ||
| 2487 | * and in such case, btrfs cannnot write sb via btrfs_commit_super, | ||
| 2488 | * and since fs_state has been set BTRFS_SUPER_FLAG_ERROR flag, | ||
| 2489 | * btrfs will cleanup all FS resources first and write sb then. | ||
| 2490 | */ | ||
| 2445 | if (!(fs_info->sb->s_flags & MS_RDONLY)) { | 2491 | if (!(fs_info->sb->s_flags & MS_RDONLY)) { |
| 2446 | ret = btrfs_commit_super(root); | 2492 | ret = btrfs_commit_super(root); |
| 2493 | if (ret) | ||
| 2494 | printk(KERN_ERR "btrfs: commit super ret %d\n", ret); | ||
| 2495 | } | ||
| 2496 | |||
| 2497 | if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { | ||
| 2498 | ret = btrfs_error_commit_super(root); | ||
| 2447 | if (ret) | 2499 | if (ret) |
| 2448 | printk(KERN_ERR "btrfs: commit super ret %d\n", ret); | 2500 | printk(KERN_ERR "btrfs: commit super ret %d\n", ret); |
| 2449 | } | 2501 | } |
| @@ -2619,6 +2671,352 @@ out: | |||
| 2619 | return 0; | 2671 | return 0; |
| 2620 | } | 2672 | } |
| 2621 | 2673 | ||
| 2674 | static void btrfs_check_super_valid(struct btrfs_fs_info *fs_info, | ||
| 2675 | int read_only) | ||
| 2676 | { | ||
| 2677 | if (read_only) | ||
| 2678 | return; | ||
| 2679 | |||
| 2680 | if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) | ||
| 2681 | printk(KERN_WARNING "warning: mount fs with errors, " | ||
| 2682 | "running btrfsck is recommended\n"); | ||
| 2683 | } | ||
| 2684 | |||
| 2685 | int btrfs_error_commit_super(struct btrfs_root *root) | ||
| 2686 | { | ||
| 2687 | int ret; | ||
| 2688 | |||
| 2689 | mutex_lock(&root->fs_info->cleaner_mutex); | ||
| 2690 | btrfs_run_delayed_iputs(root); | ||
| 2691 | mutex_unlock(&root->fs_info->cleaner_mutex); | ||
| 2692 | |||
| 2693 | down_write(&root->fs_info->cleanup_work_sem); | ||
| 2694 | up_write(&root->fs_info->cleanup_work_sem); | ||
| 2695 | |||
| 2696 | /* cleanup FS via transaction */ | ||
| 2697 | btrfs_cleanup_transaction(root); | ||
| 2698 | |||
| 2699 | ret = write_ctree_super(NULL, root, 0); | ||
| 2700 | |||
| 2701 | return ret; | ||
| 2702 | } | ||
| 2703 | |||
| 2704 | static int btrfs_destroy_ordered_operations(struct btrfs_root *root) | ||
| 2705 | { | ||
| 2706 | struct btrfs_inode *btrfs_inode; | ||
| 2707 | struct list_head splice; | ||
| 2708 | |||
| 2709 | INIT_LIST_HEAD(&splice); | ||
| 2710 | |||
| 2711 | mutex_lock(&root->fs_info->ordered_operations_mutex); | ||
| 2712 | spin_lock(&root->fs_info->ordered_extent_lock); | ||
| 2713 | |||
| 2714 | list_splice_init(&root->fs_info->ordered_operations, &splice); | ||
| 2715 | while (!list_empty(&splice)) { | ||
| 2716 | btrfs_inode = list_entry(splice.next, struct btrfs_inode, | ||
| 2717 | ordered_operations); | ||
| 2718 | |||
| 2719 | list_del_init(&btrfs_inode->ordered_operations); | ||
| 2720 | |||
| 2721 | btrfs_invalidate_inodes(btrfs_inode->root); | ||
| 2722 | } | ||
| 2723 | |||
| 2724 | spin_unlock(&root->fs_info->ordered_extent_lock); | ||
| 2725 | mutex_unlock(&root->fs_info->ordered_operations_mutex); | ||
| 2726 | |||
| 2727 | return 0; | ||
| 2728 | } | ||
| 2729 | |||
| 2730 | static int btrfs_destroy_ordered_extents(struct btrfs_root *root) | ||
| 2731 | { | ||
| 2732 | struct list_head splice; | ||
| 2733 | struct btrfs_ordered_extent *ordered; | ||
| 2734 | struct inode *inode; | ||
| 2735 | |||
| 2736 | INIT_LIST_HEAD(&splice); | ||
| 2737 | |||
| 2738 | spin_lock(&root->fs_info->ordered_extent_lock); | ||
| 2739 | |||
| 2740 | list_splice_init(&root->fs_info->ordered_extents, &splice); | ||
| 2741 | while (!list_empty(&splice)) { | ||
| 2742 | ordered = list_entry(splice.next, struct btrfs_ordered_extent, | ||
| 2743 | root_extent_list); | ||
| 2744 | |||
| 2745 | list_del_init(&ordered->root_extent_list); | ||
| 2746 | atomic_inc(&ordered->refs); | ||
| 2747 | |||
| 2748 | /* the inode may be getting freed (in sys_unlink path). */ | ||
| 2749 | inode = igrab(ordered->inode); | ||
| 2750 | |||
| 2751 | spin_unlock(&root->fs_info->ordered_extent_lock); | ||
| 2752 | if (inode) | ||
| 2753 | iput(inode); | ||
| 2754 | |||
| 2755 | atomic_set(&ordered->refs, 1); | ||
| 2756 | btrfs_put_ordered_extent(ordered); | ||
| 2757 | |||
| 2758 | spin_lock(&root->fs_info->ordered_extent_lock); | ||
| 2759 | } | ||
| 2760 | |||
| 2761 | spin_unlock(&root->fs_info->ordered_extent_lock); | ||
| 2762 | |||
| 2763 | return 0; | ||
| 2764 | } | ||
| 2765 | |||
| 2766 | static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, | ||
| 2767 | struct btrfs_root *root) | ||
| 2768 | { | ||
| 2769 | struct rb_node *node; | ||
| 2770 | struct btrfs_delayed_ref_root *delayed_refs; | ||
| 2771 | struct btrfs_delayed_ref_node *ref; | ||
| 2772 | int ret = 0; | ||
| 2773 | |||
| 2774 | delayed_refs = &trans->delayed_refs; | ||
| 2775 | |||
| 2776 | spin_lock(&delayed_refs->lock); | ||
| 2777 | if (delayed_refs->num_entries == 0) { | ||
| 2778 | printk(KERN_INFO "delayed_refs has NO entry\n"); | ||
| 2779 | return ret; | ||
| 2780 | } | ||
| 2781 | |||
| 2782 | node = rb_first(&delayed_refs->root); | ||
| 2783 | while (node) { | ||
| 2784 | ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); | ||
| 2785 | node = rb_next(node); | ||
| 2786 | |||
| 2787 | ref->in_tree = 0; | ||
| 2788 | rb_erase(&ref->rb_node, &delayed_refs->root); | ||
| 2789 | delayed_refs->num_entries--; | ||
| 2790 | |||
| 2791 | atomic_set(&ref->refs, 1); | ||
| 2792 | if (btrfs_delayed_ref_is_head(ref)) { | ||
| 2793 | struct btrfs_delayed_ref_head *head; | ||
| 2794 | |||
| 2795 | head = btrfs_delayed_node_to_head(ref); | ||
| 2796 | mutex_lock(&head->mutex); | ||
| 2797 | kfree(head->extent_op); | ||
| 2798 | delayed_refs->num_heads--; | ||
| 2799 | if (list_empty(&head->cluster)) | ||
| 2800 | delayed_refs->num_heads_ready--; | ||
| 2801 | list_del_init(&head->cluster); | ||
| 2802 | mutex_unlock(&head->mutex); | ||
| 2803 | } | ||
| 2804 | |||
| 2805 | spin_unlock(&delayed_refs->lock); | ||
| 2806 | btrfs_put_delayed_ref(ref); | ||
| 2807 | |||
| 2808 | cond_resched(); | ||
| 2809 | spin_lock(&delayed_refs->lock); | ||
| 2810 | } | ||
| 2811 | |||
| 2812 | spin_unlock(&delayed_refs->lock); | ||
| 2813 | |||
| 2814 | return ret; | ||
| 2815 | } | ||
| 2816 | |||
| 2817 | static int btrfs_destroy_pending_snapshots(struct btrfs_transaction *t) | ||
| 2818 | { | ||
| 2819 | struct btrfs_pending_snapshot *snapshot; | ||
| 2820 | struct list_head splice; | ||
| 2821 | |||
| 2822 | INIT_LIST_HEAD(&splice); | ||
| 2823 | |||
| 2824 | list_splice_init(&t->pending_snapshots, &splice); | ||
| 2825 | |||
| 2826 | while (!list_empty(&splice)) { | ||
| 2827 | snapshot = list_entry(splice.next, | ||
| 2828 | struct btrfs_pending_snapshot, | ||
| 2829 | list); | ||
| 2830 | |||
| 2831 | list_del_init(&snapshot->list); | ||
| 2832 | |||
| 2833 | kfree(snapshot); | ||
| 2834 | } | ||
| 2835 | |||
| 2836 | return 0; | ||
| 2837 | } | ||
| 2838 | |||
| 2839 | static int btrfs_destroy_delalloc_inodes(struct btrfs_root *root) | ||
| 2840 | { | ||
| 2841 | struct btrfs_inode *btrfs_inode; | ||
| 2842 | struct list_head splice; | ||
| 2843 | |||
| 2844 | INIT_LIST_HEAD(&splice); | ||
| 2845 | |||
| 2846 | list_splice_init(&root->fs_info->delalloc_inodes, &splice); | ||
| 2847 | |||
| 2848 | spin_lock(&root->fs_info->delalloc_lock); | ||
| 2849 | |||
| 2850 | while (!list_empty(&splice)) { | ||
| 2851 | btrfs_inode = list_entry(splice.next, struct btrfs_inode, | ||
| 2852 | delalloc_inodes); | ||
| 2853 | |||
| 2854 | list_del_init(&btrfs_inode->delalloc_inodes); | ||
| 2855 | |||
| 2856 | btrfs_invalidate_inodes(btrfs_inode->root); | ||
| 2857 | } | ||
| 2858 | |||
| 2859 | spin_unlock(&root->fs_info->delalloc_lock); | ||
| 2860 | |||
| 2861 | return 0; | ||
| 2862 | } | ||
| 2863 | |||
| 2864 | static int btrfs_destroy_marked_extents(struct btrfs_root *root, | ||
| 2865 | struct extent_io_tree *dirty_pages, | ||
| 2866 | int mark) | ||
| 2867 | { | ||
| 2868 | int ret; | ||
| 2869 | struct page *page; | ||
| 2870 | struct inode *btree_inode = root->fs_info->btree_inode; | ||
| 2871 | struct extent_buffer *eb; | ||
| 2872 | u64 start = 0; | ||
| 2873 | u64 end; | ||
| 2874 | u64 offset; | ||
| 2875 | unsigned long index; | ||
| 2876 | |||
| 2877 | while (1) { | ||
| 2878 | ret = find_first_extent_bit(dirty_pages, start, &start, &end, | ||
| 2879 | mark); | ||
| 2880 | if (ret) | ||
| 2881 | break; | ||
| 2882 | |||
| 2883 | clear_extent_bits(dirty_pages, start, end, mark, GFP_NOFS); | ||
| 2884 | while (start <= end) { | ||
| 2885 | index = start >> PAGE_CACHE_SHIFT; | ||
| 2886 | start = (u64)(index + 1) << PAGE_CACHE_SHIFT; | ||
| 2887 | page = find_get_page(btree_inode->i_mapping, index); | ||
| 2888 | if (!page) | ||
| 2889 | continue; | ||
| 2890 | offset = page_offset(page); | ||
| 2891 | |||
| 2892 | spin_lock(&dirty_pages->buffer_lock); | ||
| 2893 | eb = radix_tree_lookup( | ||
| 2894 | &(&BTRFS_I(page->mapping->host)->io_tree)->buffer, | ||
| 2895 | offset >> PAGE_CACHE_SHIFT); | ||
| 2896 | spin_unlock(&dirty_pages->buffer_lock); | ||
| 2897 | if (eb) { | ||
| 2898 | ret = test_and_clear_bit(EXTENT_BUFFER_DIRTY, | ||
| 2899 | &eb->bflags); | ||
| 2900 | atomic_set(&eb->refs, 1); | ||
| 2901 | } | ||
| 2902 | if (PageWriteback(page)) | ||
| 2903 | end_page_writeback(page); | ||
| 2904 | |||
| 2905 | lock_page(page); | ||
| 2906 | if (PageDirty(page)) { | ||
| 2907 | clear_page_dirty_for_io(page); | ||
| 2908 | spin_lock_irq(&page->mapping->tree_lock); | ||
| 2909 | radix_tree_tag_clear(&page->mapping->page_tree, | ||
| 2910 | page_index(page), | ||
| 2911 | PAGECACHE_TAG_DIRTY); | ||
| 2912 | spin_unlock_irq(&page->mapping->tree_lock); | ||
| 2913 | } | ||
| 2914 | |||
| 2915 | page->mapping->a_ops->invalidatepage(page, 0); | ||
| 2916 | unlock_page(page); | ||
| 2917 | } | ||
| 2918 | } | ||
| 2919 | |||
| 2920 | return ret; | ||
| 2921 | } | ||
| 2922 | |||
| 2923 | static int btrfs_destroy_pinned_extent(struct btrfs_root *root, | ||
| 2924 | struct extent_io_tree *pinned_extents) | ||
| 2925 | { | ||
| 2926 | struct extent_io_tree *unpin; | ||
| 2927 | u64 start; | ||
| 2928 | u64 end; | ||
| 2929 | int ret; | ||
| 2930 | |||
| 2931 | unpin = pinned_extents; | ||
| 2932 | while (1) { | ||
| 2933 | ret = find_first_extent_bit(unpin, 0, &start, &end, | ||
| 2934 | EXTENT_DIRTY); | ||
| 2935 | if (ret) | ||
| 2936 | break; | ||
| 2937 | |||
| 2938 | /* opt_discard */ | ||
| 2939 | ret = btrfs_error_discard_extent(root, start, end + 1 - start); | ||
| 2940 | |||
| 2941 | clear_extent_dirty(unpin, start, end, GFP_NOFS); | ||
| 2942 | btrfs_error_unpin_extent_range(root, start, end); | ||
| 2943 | cond_resched(); | ||
| 2944 | } | ||
| 2945 | |||
| 2946 | return 0; | ||
| 2947 | } | ||
| 2948 | |||
| 2949 | static int btrfs_cleanup_transaction(struct btrfs_root *root) | ||
| 2950 | { | ||
| 2951 | struct btrfs_transaction *t; | ||
| 2952 | LIST_HEAD(list); | ||
| 2953 | |||
| 2954 | WARN_ON(1); | ||
| 2955 | |||
| 2956 | mutex_lock(&root->fs_info->trans_mutex); | ||
| 2957 | mutex_lock(&root->fs_info->transaction_kthread_mutex); | ||
| 2958 | |||
| 2959 | list_splice_init(&root->fs_info->trans_list, &list); | ||
| 2960 | while (!list_empty(&list)) { | ||
| 2961 | t = list_entry(list.next, struct btrfs_transaction, list); | ||
| 2962 | if (!t) | ||
| 2963 | break; | ||
| 2964 | |||
| 2965 | btrfs_destroy_ordered_operations(root); | ||
| 2966 | |||
| 2967 | btrfs_destroy_ordered_extents(root); | ||
| 2968 | |||
| 2969 | btrfs_destroy_delayed_refs(t, root); | ||
| 2970 | |||
| 2971 | btrfs_block_rsv_release(root, | ||
| 2972 | &root->fs_info->trans_block_rsv, | ||
| 2973 | t->dirty_pages.dirty_bytes); | ||
| 2974 | |||
| 2975 | /* FIXME: cleanup wait for commit */ | ||
| 2976 | t->in_commit = 1; | ||
| 2977 | t->blocked = 1; | ||
| 2978 | if (waitqueue_active(&root->fs_info->transaction_blocked_wait)) | ||
| 2979 | wake_up(&root->fs_info->transaction_blocked_wait); | ||
| 2980 | |||
| 2981 | t->blocked = 0; | ||
| 2982 | if (waitqueue_active(&root->fs_info->transaction_wait)) | ||
| 2983 | wake_up(&root->fs_info->transaction_wait); | ||
| 2984 | mutex_unlock(&root->fs_info->trans_mutex); | ||
| 2985 | |||
| 2986 | mutex_lock(&root->fs_info->trans_mutex); | ||
| 2987 | t->commit_done = 1; | ||
| 2988 | if (waitqueue_active(&t->commit_wait)) | ||
| 2989 | wake_up(&t->commit_wait); | ||
| 2990 | mutex_unlock(&root->fs_info->trans_mutex); | ||
| 2991 | |||
| 2992 | mutex_lock(&root->fs_info->trans_mutex); | ||
| 2993 | |||
| 2994 | btrfs_destroy_pending_snapshots(t); | ||
| 2995 | |||
| 2996 | btrfs_destroy_delalloc_inodes(root); | ||
| 2997 | |||
| 2998 | spin_lock(&root->fs_info->new_trans_lock); | ||
| 2999 | root->fs_info->running_transaction = NULL; | ||
| 3000 | spin_unlock(&root->fs_info->new_trans_lock); | ||
| 3001 | |||
| 3002 | btrfs_destroy_marked_extents(root, &t->dirty_pages, | ||
| 3003 | EXTENT_DIRTY); | ||
| 3004 | |||
| 3005 | btrfs_destroy_pinned_extent(root, | ||
| 3006 | root->fs_info->pinned_extents); | ||
| 3007 | |||
| 3008 | t->use_count = 0; | ||
| 3009 | list_del_init(&t->list); | ||
| 3010 | memset(t, 0, sizeof(*t)); | ||
| 3011 | kmem_cache_free(btrfs_transaction_cachep, t); | ||
| 3012 | } | ||
| 3013 | |||
| 3014 | mutex_unlock(&root->fs_info->transaction_kthread_mutex); | ||
| 3015 | mutex_unlock(&root->fs_info->trans_mutex); | ||
| 3016 | |||
| 3017 | return 0; | ||
| 3018 | } | ||
| 3019 | |||
| 2622 | static struct extent_io_ops btree_extent_io_ops = { | 3020 | static struct extent_io_ops btree_extent_io_ops = { |
| 2623 | .write_cache_pages_lock_hook = btree_lock_page_hook, | 3021 | .write_cache_pages_lock_hook = btree_lock_page_hook, |
| 2624 | .readpage_end_io_hook = btree_readpage_end_io_hook, | 3022 | .readpage_end_io_hook = btree_readpage_end_io_hook, |
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 88e825a0bf21..07b20dc2fd95 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h | |||
| @@ -52,6 +52,7 @@ int write_ctree_super(struct btrfs_trans_handle *trans, | |||
| 52 | struct btrfs_root *root, int max_mirrors); | 52 | struct btrfs_root *root, int max_mirrors); |
| 53 | struct buffer_head *btrfs_read_dev_super(struct block_device *bdev); | 53 | struct buffer_head *btrfs_read_dev_super(struct block_device *bdev); |
| 54 | int btrfs_commit_super(struct btrfs_root *root); | 54 | int btrfs_commit_super(struct btrfs_root *root); |
| 55 | int btrfs_error_commit_super(struct btrfs_root *root); | ||
| 55 | struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, | 56 | struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, |
| 56 | u64 bytenr, u32 blocksize); | 57 | u64 bytenr, u32 blocksize); |
| 57 | struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info, | 58 | struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info, |
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 227e5815d838..b55269340cec 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
| @@ -3089,7 +3089,7 @@ static u64 get_alloc_profile(struct btrfs_root *root, u64 flags) | |||
| 3089 | return btrfs_reduce_alloc_profile(root, flags); | 3089 | return btrfs_reduce_alloc_profile(root, flags); |
| 3090 | } | 3090 | } |
| 3091 | 3091 | ||
| 3092 | static u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data) | 3092 | u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data) |
| 3093 | { | 3093 | { |
| 3094 | u64 flags; | 3094 | u64 flags; |
| 3095 | 3095 | ||
| @@ -3161,8 +3161,12 @@ alloc: | |||
| 3161 | bytes + 2 * 1024 * 1024, | 3161 | bytes + 2 * 1024 * 1024, |
| 3162 | alloc_target, 0); | 3162 | alloc_target, 0); |
| 3163 | btrfs_end_transaction(trans, root); | 3163 | btrfs_end_transaction(trans, root); |
| 3164 | if (ret < 0) | 3164 | if (ret < 0) { |
| 3165 | return ret; | 3165 | if (ret != -ENOSPC) |
| 3166 | return ret; | ||
| 3167 | else | ||
| 3168 | goto commit_trans; | ||
| 3169 | } | ||
| 3166 | 3170 | ||
| 3167 | if (!data_sinfo) { | 3171 | if (!data_sinfo) { |
| 3168 | btrfs_set_inode_space_info(root, inode); | 3172 | btrfs_set_inode_space_info(root, inode); |
| @@ -3173,6 +3177,7 @@ alloc: | |||
| 3173 | spin_unlock(&data_sinfo->lock); | 3177 | spin_unlock(&data_sinfo->lock); |
| 3174 | 3178 | ||
| 3175 | /* commit the current transaction and try again */ | 3179 | /* commit the current transaction and try again */ |
| 3180 | commit_trans: | ||
| 3176 | if (!committed && !root->fs_info->open_ioctl_trans) { | 3181 | if (!committed && !root->fs_info->open_ioctl_trans) { |
| 3177 | committed = 1; | 3182 | committed = 1; |
| 3178 | trans = btrfs_join_transaction(root, 1); | 3183 | trans = btrfs_join_transaction(root, 1); |
| @@ -3721,11 +3726,6 @@ int btrfs_block_rsv_check(struct btrfs_trans_handle *trans, | |||
| 3721 | return 0; | 3726 | return 0; |
| 3722 | } | 3727 | } |
| 3723 | 3728 | ||
| 3724 | WARN_ON(1); | ||
| 3725 | printk(KERN_INFO"block_rsv size %llu reserved %llu freed %llu %llu\n", | ||
| 3726 | block_rsv->size, block_rsv->reserved, | ||
| 3727 | block_rsv->freed[0], block_rsv->freed[1]); | ||
| 3728 | |||
| 3729 | return -ENOSPC; | 3729 | return -ENOSPC; |
| 3730 | } | 3730 | } |
| 3731 | 3731 | ||
| @@ -7970,13 +7970,14 @@ static int set_block_group_ro(struct btrfs_block_group_cache *cache) | |||
| 7970 | 7970 | ||
| 7971 | if (sinfo->bytes_used + sinfo->bytes_reserved + sinfo->bytes_pinned + | 7971 | if (sinfo->bytes_used + sinfo->bytes_reserved + sinfo->bytes_pinned + |
| 7972 | sinfo->bytes_may_use + sinfo->bytes_readonly + | 7972 | sinfo->bytes_may_use + sinfo->bytes_readonly + |
| 7973 | cache->reserved_pinned + num_bytes < sinfo->total_bytes) { | 7973 | cache->reserved_pinned + num_bytes <= sinfo->total_bytes) { |
| 7974 | sinfo->bytes_readonly += num_bytes; | 7974 | sinfo->bytes_readonly += num_bytes; |
| 7975 | sinfo->bytes_reserved += cache->reserved_pinned; | 7975 | sinfo->bytes_reserved += cache->reserved_pinned; |
| 7976 | cache->reserved_pinned = 0; | 7976 | cache->reserved_pinned = 0; |
| 7977 | cache->ro = 1; | 7977 | cache->ro = 1; |
| 7978 | ret = 0; | 7978 | ret = 0; |
| 7979 | } | 7979 | } |
| 7980 | |||
| 7980 | spin_unlock(&cache->lock); | 7981 | spin_unlock(&cache->lock); |
| 7981 | spin_unlock(&sinfo->lock); | 7982 | spin_unlock(&sinfo->lock); |
| 7982 | return ret; | 7983 | return ret; |
| @@ -8012,6 +8013,62 @@ out: | |||
| 8012 | return ret; | 8013 | return ret; |
| 8013 | } | 8014 | } |
| 8014 | 8015 | ||
| 8016 | /* | ||
| 8017 | * helper to account the unused space of all the readonly block group in the | ||
| 8018 | * list. takes mirrors into account. | ||
| 8019 | */ | ||
| 8020 | static u64 __btrfs_get_ro_block_group_free_space(struct list_head *groups_list) | ||
| 8021 | { | ||
| 8022 | struct btrfs_block_group_cache *block_group; | ||
| 8023 | u64 free_bytes = 0; | ||
| 8024 | int factor; | ||
| 8025 | |||
| 8026 | list_for_each_entry(block_group, groups_list, list) { | ||
| 8027 | spin_lock(&block_group->lock); | ||
| 8028 | |||
| 8029 | if (!block_group->ro) { | ||
| 8030 | spin_unlock(&block_group->lock); | ||
| 8031 | continue; | ||
| 8032 | } | ||
| 8033 | |||
| 8034 | if (block_group->flags & (BTRFS_BLOCK_GROUP_RAID1 | | ||
| 8035 | BTRFS_BLOCK_GROUP_RAID10 | | ||
| 8036 | BTRFS_BLOCK_GROUP_DUP)) | ||
| 8037 | factor = 2; | ||
| 8038 | else | ||
| 8039 | factor = 1; | ||
| 8040 | |||
| 8041 | free_bytes += (block_group->key.offset - | ||
| 8042 | btrfs_block_group_used(&block_group->item)) * | ||
| 8043 | factor; | ||
| 8044 | |||
| 8045 | spin_unlock(&block_group->lock); | ||
| 8046 | } | ||
| 8047 | |||
| 8048 | return free_bytes; | ||
| 8049 | } | ||
| 8050 | |||
| 8051 | /* | ||
| 8052 | * helper to account the unused space of all the readonly block group in the | ||
| 8053 | * space_info. takes mirrors into account. | ||
| 8054 | */ | ||
| 8055 | u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo) | ||
| 8056 | { | ||
| 8057 | int i; | ||
| 8058 | u64 free_bytes = 0; | ||
| 8059 | |||
| 8060 | spin_lock(&sinfo->lock); | ||
| 8061 | |||
| 8062 | for(i = 0; i < BTRFS_NR_RAID_TYPES; i++) | ||
| 8063 | if (!list_empty(&sinfo->block_groups[i])) | ||
| 8064 | free_bytes += __btrfs_get_ro_block_group_free_space( | ||
| 8065 | &sinfo->block_groups[i]); | ||
| 8066 | |||
| 8067 | spin_unlock(&sinfo->lock); | ||
| 8068 | |||
| 8069 | return free_bytes; | ||
| 8070 | } | ||
| 8071 | |||
| 8015 | int btrfs_set_block_group_rw(struct btrfs_root *root, | 8072 | int btrfs_set_block_group_rw(struct btrfs_root *root, |
| 8016 | struct btrfs_block_group_cache *cache) | 8073 | struct btrfs_block_group_cache *cache) |
| 8017 | { | 8074 | { |
| @@ -8092,7 +8149,7 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) | |||
| 8092 | mutex_lock(&root->fs_info->chunk_mutex); | 8149 | mutex_lock(&root->fs_info->chunk_mutex); |
| 8093 | list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) { | 8150 | list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) { |
| 8094 | u64 min_free = btrfs_block_group_used(&block_group->item); | 8151 | u64 min_free = btrfs_block_group_used(&block_group->item); |
| 8095 | u64 dev_offset, max_avail; | 8152 | u64 dev_offset; |
| 8096 | 8153 | ||
| 8097 | /* | 8154 | /* |
| 8098 | * check to make sure we can actually find a chunk with enough | 8155 | * check to make sure we can actually find a chunk with enough |
| @@ -8100,7 +8157,7 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) | |||
| 8100 | */ | 8157 | */ |
| 8101 | if (device->total_bytes > device->bytes_used + min_free) { | 8158 | if (device->total_bytes > device->bytes_used + min_free) { |
| 8102 | ret = find_free_dev_extent(NULL, device, min_free, | 8159 | ret = find_free_dev_extent(NULL, device, min_free, |
| 8103 | &dev_offset, &max_avail); | 8160 | &dev_offset, NULL); |
| 8104 | if (!ret) | 8161 | if (!ret) |
| 8105 | break; | 8162 | break; |
| 8106 | ret = -1; | 8163 | ret = -1; |
| @@ -8584,3 +8641,14 @@ out: | |||
| 8584 | btrfs_free_path(path); | 8641 | btrfs_free_path(path); |
| 8585 | return ret; | 8642 | return ret; |
| 8586 | } | 8643 | } |
| 8644 | |||
| 8645 | int btrfs_error_unpin_extent_range(struct btrfs_root *root, u64 start, u64 end) | ||
| 8646 | { | ||
| 8647 | return unpin_extent_range(root, start, end); | ||
| 8648 | } | ||
| 8649 | |||
| 8650 | int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr, | ||
| 8651 | u64 num_bytes) | ||
| 8652 | { | ||
| 8653 | return btrfs_discard_extent(root, bytenr, num_bytes); | ||
| 8654 | } | ||
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 3e86b9f36507..2e993cf1766e 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
| @@ -2028,8 +2028,11 @@ static int __extent_read_full_page(struct extent_io_tree *tree, | |||
| 2028 | BUG_ON(extent_map_end(em) <= cur); | 2028 | BUG_ON(extent_map_end(em) <= cur); |
| 2029 | BUG_ON(end < cur); | 2029 | BUG_ON(end < cur); |
| 2030 | 2030 | ||
| 2031 | if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) | 2031 | if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) { |
| 2032 | this_bio_flag = EXTENT_BIO_COMPRESSED; | 2032 | this_bio_flag = EXTENT_BIO_COMPRESSED; |
| 2033 | extent_set_compress_type(&this_bio_flag, | ||
| 2034 | em->compress_type); | ||
| 2035 | } | ||
| 2033 | 2036 | ||
| 2034 | iosize = min(extent_map_end(em) - cur, end - cur + 1); | 2037 | iosize = min(extent_map_end(em) - cur, end - cur + 1); |
| 2035 | cur_end = min(extent_map_end(em) - 1, end); | 2038 | cur_end = min(extent_map_end(em) - 1, end); |
| @@ -3072,6 +3075,8 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree, | |||
| 3072 | #endif | 3075 | #endif |
| 3073 | 3076 | ||
| 3074 | eb = kmem_cache_zalloc(extent_buffer_cache, mask); | 3077 | eb = kmem_cache_zalloc(extent_buffer_cache, mask); |
| 3078 | if (eb == NULL) | ||
| 3079 | return NULL; | ||
| 3075 | eb->start = start; | 3080 | eb->start = start; |
| 3076 | eb->len = len; | 3081 | eb->len = len; |
| 3077 | spin_lock_init(&eb->lock); | 3082 | spin_lock_init(&eb->lock); |
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 4183c8178f01..7083cfafd061 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h | |||
| @@ -20,8 +20,12 @@ | |||
| 20 | #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) | 20 | #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) |
| 21 | #define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING | EXTENT_FIRST_DELALLOC) | 21 | #define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING | EXTENT_FIRST_DELALLOC) |
| 22 | 22 | ||
| 23 | /* flags for bio submission */ | 23 | /* |
| 24 | * flags for bio submission. The high bits indicate the compression | ||
| 25 | * type for this bio | ||
| 26 | */ | ||
| 24 | #define EXTENT_BIO_COMPRESSED 1 | 27 | #define EXTENT_BIO_COMPRESSED 1 |
| 28 | #define EXTENT_BIO_FLAG_SHIFT 16 | ||
| 25 | 29 | ||
| 26 | /* these are bit numbers for test/set bit */ | 30 | /* these are bit numbers for test/set bit */ |
| 27 | #define EXTENT_BUFFER_UPTODATE 0 | 31 | #define EXTENT_BUFFER_UPTODATE 0 |
| @@ -135,6 +139,17 @@ struct extent_buffer { | |||
| 135 | wait_queue_head_t lock_wq; | 139 | wait_queue_head_t lock_wq; |
| 136 | }; | 140 | }; |
| 137 | 141 | ||
| 142 | static inline void extent_set_compress_type(unsigned long *bio_flags, | ||
| 143 | int compress_type) | ||
| 144 | { | ||
| 145 | *bio_flags |= compress_type << EXTENT_BIO_FLAG_SHIFT; | ||
| 146 | } | ||
| 147 | |||
| 148 | static inline int extent_compress_type(unsigned long bio_flags) | ||
| 149 | { | ||
| 150 | return bio_flags >> EXTENT_BIO_FLAG_SHIFT; | ||
| 151 | } | ||
| 152 | |||
| 138 | struct extent_map_tree; | 153 | struct extent_map_tree; |
| 139 | 154 | ||
| 140 | static inline struct extent_state *extent_state_next(struct extent_state *state) | 155 | static inline struct extent_state *extent_state_next(struct extent_state *state) |
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 23cb8da3ff66..b0e1fce12530 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c | |||
| @@ -3,6 +3,7 @@ | |||
| 3 | #include <linux/module.h> | 3 | #include <linux/module.h> |
| 4 | #include <linux/spinlock.h> | 4 | #include <linux/spinlock.h> |
| 5 | #include <linux/hardirq.h> | 5 | #include <linux/hardirq.h> |
| 6 | #include "ctree.h" | ||
| 6 | #include "extent_map.h" | 7 | #include "extent_map.h" |
| 7 | 8 | ||
| 8 | 9 | ||
| @@ -54,6 +55,7 @@ struct extent_map *alloc_extent_map(gfp_t mask) | |||
| 54 | return em; | 55 | return em; |
| 55 | em->in_tree = 0; | 56 | em->in_tree = 0; |
| 56 | em->flags = 0; | 57 | em->flags = 0; |
| 58 | em->compress_type = BTRFS_COMPRESS_NONE; | ||
| 57 | atomic_set(&em->refs, 1); | 59 | atomic_set(&em->refs, 1); |
| 58 | return em; | 60 | return em; |
| 59 | } | 61 | } |
diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index ab6d74b6e647..28b44dbd1e35 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h | |||
| @@ -26,7 +26,8 @@ struct extent_map { | |||
| 26 | unsigned long flags; | 26 | unsigned long flags; |
| 27 | struct block_device *bdev; | 27 | struct block_device *bdev; |
| 28 | atomic_t refs; | 28 | atomic_t refs; |
| 29 | int in_tree; | 29 | unsigned int in_tree:1; |
| 30 | unsigned int compress_type:4; | ||
| 30 | }; | 31 | }; |
| 31 | 32 | ||
| 32 | struct extent_map_tree { | 33 | struct extent_map_tree { |
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index a9e0a4eaf3d9..c800d58f3013 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
| @@ -225,6 +225,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | |||
| 225 | 225 | ||
| 226 | split->bdev = em->bdev; | 226 | split->bdev = em->bdev; |
| 227 | split->flags = flags; | 227 | split->flags = flags; |
| 228 | split->compress_type = em->compress_type; | ||
| 228 | ret = add_extent_mapping(em_tree, split); | 229 | ret = add_extent_mapping(em_tree, split); |
| 229 | BUG_ON(ret); | 230 | BUG_ON(ret); |
| 230 | free_extent_map(split); | 231 | free_extent_map(split); |
| @@ -239,6 +240,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | |||
| 239 | split->len = em->start + em->len - (start + len); | 240 | split->len = em->start + em->len - (start + len); |
| 240 | split->bdev = em->bdev; | 241 | split->bdev = em->bdev; |
| 241 | split->flags = flags; | 242 | split->flags = flags; |
| 243 | split->compress_type = em->compress_type; | ||
| 242 | 244 | ||
| 243 | if (compressed) { | 245 | if (compressed) { |
| 244 | split->block_len = em->block_len; | 246 | split->block_len = em->block_len; |
| @@ -891,6 +893,17 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, | |||
| 891 | if (err) | 893 | if (err) |
| 892 | goto out; | 894 | goto out; |
| 893 | 895 | ||
| 896 | /* | ||
| 897 | * If BTRFS flips readonly due to some impossible error | ||
| 898 | * (fs_info->fs_state now has BTRFS_SUPER_FLAG_ERROR), | ||
| 899 | * although we have opened a file as writable, we have | ||
| 900 | * to stop this write operation to ensure FS consistency. | ||
| 901 | */ | ||
| 902 | if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { | ||
| 903 | err = -EROFS; | ||
| 904 | goto out; | ||
| 905 | } | ||
| 906 | |||
| 894 | file_update_time(file); | 907 | file_update_time(file); |
| 895 | BTRFS_I(inode)->sequence++; | 908 | BTRFS_I(inode)->sequence++; |
| 896 | 909 | ||
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 902afbf50811..160b55b3e132 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
| @@ -122,10 +122,10 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans, | |||
| 122 | size_t cur_size = size; | 122 | size_t cur_size = size; |
| 123 | size_t datasize; | 123 | size_t datasize; |
| 124 | unsigned long offset; | 124 | unsigned long offset; |
| 125 | int use_compress = 0; | 125 | int compress_type = BTRFS_COMPRESS_NONE; |
| 126 | 126 | ||
| 127 | if (compressed_size && compressed_pages) { | 127 | if (compressed_size && compressed_pages) { |
| 128 | use_compress = 1; | 128 | compress_type = root->fs_info->compress_type; |
| 129 | cur_size = compressed_size; | 129 | cur_size = compressed_size; |
| 130 | } | 130 | } |
| 131 | 131 | ||
| @@ -159,7 +159,7 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans, | |||
| 159 | btrfs_set_file_extent_ram_bytes(leaf, ei, size); | 159 | btrfs_set_file_extent_ram_bytes(leaf, ei, size); |
| 160 | ptr = btrfs_file_extent_inline_start(ei); | 160 | ptr = btrfs_file_extent_inline_start(ei); |
| 161 | 161 | ||
| 162 | if (use_compress) { | 162 | if (compress_type != BTRFS_COMPRESS_NONE) { |
| 163 | struct page *cpage; | 163 | struct page *cpage; |
| 164 | int i = 0; | 164 | int i = 0; |
| 165 | while (compressed_size > 0) { | 165 | while (compressed_size > 0) { |
| @@ -176,7 +176,7 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans, | |||
| 176 | compressed_size -= cur_size; | 176 | compressed_size -= cur_size; |
| 177 | } | 177 | } |
| 178 | btrfs_set_file_extent_compression(leaf, ei, | 178 | btrfs_set_file_extent_compression(leaf, ei, |
| 179 | BTRFS_COMPRESS_ZLIB); | 179 | compress_type); |
| 180 | } else { | 180 | } else { |
| 181 | page = find_get_page(inode->i_mapping, | 181 | page = find_get_page(inode->i_mapping, |
| 182 | start >> PAGE_CACHE_SHIFT); | 182 | start >> PAGE_CACHE_SHIFT); |
| @@ -263,6 +263,7 @@ struct async_extent { | |||
| 263 | u64 compressed_size; | 263 | u64 compressed_size; |
| 264 | struct page **pages; | 264 | struct page **pages; |
| 265 | unsigned long nr_pages; | 265 | unsigned long nr_pages; |
| 266 | int compress_type; | ||
| 266 | struct list_head list; | 267 | struct list_head list; |
| 267 | }; | 268 | }; |
| 268 | 269 | ||
| @@ -280,7 +281,8 @@ static noinline int add_async_extent(struct async_cow *cow, | |||
| 280 | u64 start, u64 ram_size, | 281 | u64 start, u64 ram_size, |
| 281 | u64 compressed_size, | 282 | u64 compressed_size, |
| 282 | struct page **pages, | 283 | struct page **pages, |
| 283 | unsigned long nr_pages) | 284 | unsigned long nr_pages, |
| 285 | int compress_type) | ||
| 284 | { | 286 | { |
| 285 | struct async_extent *async_extent; | 287 | struct async_extent *async_extent; |
| 286 | 288 | ||
| @@ -290,6 +292,7 @@ static noinline int add_async_extent(struct async_cow *cow, | |||
| 290 | async_extent->compressed_size = compressed_size; | 292 | async_extent->compressed_size = compressed_size; |
| 291 | async_extent->pages = pages; | 293 | async_extent->pages = pages; |
| 292 | async_extent->nr_pages = nr_pages; | 294 | async_extent->nr_pages = nr_pages; |
| 295 | async_extent->compress_type = compress_type; | ||
| 293 | list_add_tail(&async_extent->list, &cow->extents); | 296 | list_add_tail(&async_extent->list, &cow->extents); |
| 294 | return 0; | 297 | return 0; |
| 295 | } | 298 | } |
| @@ -332,6 +335,7 @@ static noinline int compress_file_range(struct inode *inode, | |||
| 332 | unsigned long max_uncompressed = 128 * 1024; | 335 | unsigned long max_uncompressed = 128 * 1024; |
| 333 | int i; | 336 | int i; |
| 334 | int will_compress; | 337 | int will_compress; |
| 338 | int compress_type = root->fs_info->compress_type; | ||
| 335 | 339 | ||
| 336 | actual_end = min_t(u64, isize, end + 1); | 340 | actual_end = min_t(u64, isize, end + 1); |
| 337 | again: | 341 | again: |
| @@ -381,12 +385,16 @@ again: | |||
| 381 | WARN_ON(pages); | 385 | WARN_ON(pages); |
| 382 | pages = kzalloc(sizeof(struct page *) * nr_pages, GFP_NOFS); | 386 | pages = kzalloc(sizeof(struct page *) * nr_pages, GFP_NOFS); |
| 383 | 387 | ||
| 384 | ret = btrfs_zlib_compress_pages(inode->i_mapping, start, | 388 | if (BTRFS_I(inode)->force_compress) |
| 385 | total_compressed, pages, | 389 | compress_type = BTRFS_I(inode)->force_compress; |
| 386 | nr_pages, &nr_pages_ret, | 390 | |
| 387 | &total_in, | 391 | ret = btrfs_compress_pages(compress_type, |
| 388 | &total_compressed, | 392 | inode->i_mapping, start, |
| 389 | max_compressed); | 393 | total_compressed, pages, |
| 394 | nr_pages, &nr_pages_ret, | ||
| 395 | &total_in, | ||
| 396 | &total_compressed, | ||
| 397 | max_compressed); | ||
| 390 | 398 | ||
| 391 | if (!ret) { | 399 | if (!ret) { |
| 392 | unsigned long offset = total_compressed & | 400 | unsigned long offset = total_compressed & |
| @@ -493,7 +501,8 @@ again: | |||
| 493 | * and will submit them to the elevator. | 501 | * and will submit them to the elevator. |
| 494 | */ | 502 | */ |
| 495 | add_async_extent(async_cow, start, num_bytes, | 503 | add_async_extent(async_cow, start, num_bytes, |
| 496 | total_compressed, pages, nr_pages_ret); | 504 | total_compressed, pages, nr_pages_ret, |
| 505 | compress_type); | ||
| 497 | 506 | ||
| 498 | if (start + num_bytes < end) { | 507 | if (start + num_bytes < end) { |
| 499 | start += num_bytes; | 508 | start += num_bytes; |
| @@ -515,7 +524,8 @@ cleanup_and_bail_uncompressed: | |||
| 515 | __set_page_dirty_nobuffers(locked_page); | 524 | __set_page_dirty_nobuffers(locked_page); |
| 516 | /* unlocked later on in the async handlers */ | 525 | /* unlocked later on in the async handlers */ |
| 517 | } | 526 | } |
| 518 | add_async_extent(async_cow, start, end - start + 1, 0, NULL, 0); | 527 | add_async_extent(async_cow, start, end - start + 1, |
| 528 | 0, NULL, 0, BTRFS_COMPRESS_NONE); | ||
| 519 | *num_added += 1; | 529 | *num_added += 1; |
| 520 | } | 530 | } |
| 521 | 531 | ||
| @@ -640,6 +650,7 @@ retry: | |||
| 640 | em->block_start = ins.objectid; | 650 | em->block_start = ins.objectid; |
| 641 | em->block_len = ins.offset; | 651 | em->block_len = ins.offset; |
| 642 | em->bdev = root->fs_info->fs_devices->latest_bdev; | 652 | em->bdev = root->fs_info->fs_devices->latest_bdev; |
| 653 | em->compress_type = async_extent->compress_type; | ||
| 643 | set_bit(EXTENT_FLAG_PINNED, &em->flags); | 654 | set_bit(EXTENT_FLAG_PINNED, &em->flags); |
| 644 | set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); | 655 | set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); |
| 645 | 656 | ||
| @@ -656,11 +667,13 @@ retry: | |||
| 656 | async_extent->ram_size - 1, 0); | 667 | async_extent->ram_size - 1, 0); |
| 657 | } | 668 | } |
| 658 | 669 | ||
| 659 | ret = btrfs_add_ordered_extent(inode, async_extent->start, | 670 | ret = btrfs_add_ordered_extent_compress(inode, |
| 660 | ins.objectid, | 671 | async_extent->start, |
| 661 | async_extent->ram_size, | 672 | ins.objectid, |
| 662 | ins.offset, | 673 | async_extent->ram_size, |
| 663 | BTRFS_ORDERED_COMPRESSED); | 674 | ins.offset, |
| 675 | BTRFS_ORDERED_COMPRESSED, | ||
| 676 | async_extent->compress_type); | ||
| 664 | BUG_ON(ret); | 677 | BUG_ON(ret); |
| 665 | 678 | ||
| 666 | /* | 679 | /* |
| @@ -1670,7 +1683,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) | |||
| 1670 | struct btrfs_ordered_extent *ordered_extent = NULL; | 1683 | struct btrfs_ordered_extent *ordered_extent = NULL; |
| 1671 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | 1684 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; |
| 1672 | struct extent_state *cached_state = NULL; | 1685 | struct extent_state *cached_state = NULL; |
| 1673 | int compressed = 0; | 1686 | int compress_type = 0; |
| 1674 | int ret; | 1687 | int ret; |
| 1675 | bool nolock = false; | 1688 | bool nolock = false; |
| 1676 | 1689 | ||
| @@ -1711,9 +1724,9 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) | |||
| 1711 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; | 1724 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; |
| 1712 | 1725 | ||
| 1713 | if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags)) | 1726 | if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags)) |
| 1714 | compressed = 1; | 1727 | compress_type = ordered_extent->compress_type; |
| 1715 | if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) { | 1728 | if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) { |
| 1716 | BUG_ON(compressed); | 1729 | BUG_ON(compress_type); |
| 1717 | ret = btrfs_mark_extent_written(trans, inode, | 1730 | ret = btrfs_mark_extent_written(trans, inode, |
| 1718 | ordered_extent->file_offset, | 1731 | ordered_extent->file_offset, |
| 1719 | ordered_extent->file_offset + | 1732 | ordered_extent->file_offset + |
| @@ -1727,7 +1740,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) | |||
| 1727 | ordered_extent->disk_len, | 1740 | ordered_extent->disk_len, |
| 1728 | ordered_extent->len, | 1741 | ordered_extent->len, |
| 1729 | ordered_extent->len, | 1742 | ordered_extent->len, |
| 1730 | compressed, 0, 0, | 1743 | compress_type, 0, 0, |
| 1731 | BTRFS_FILE_EXTENT_REG); | 1744 | BTRFS_FILE_EXTENT_REG); |
| 1732 | unpin_extent_cache(&BTRFS_I(inode)->extent_tree, | 1745 | unpin_extent_cache(&BTRFS_I(inode)->extent_tree, |
| 1733 | ordered_extent->file_offset, | 1746 | ordered_extent->file_offset, |
| @@ -1829,6 +1842,8 @@ static int btrfs_io_failed_hook(struct bio *failed_bio, | |||
| 1829 | if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) { | 1842 | if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) { |
| 1830 | logical = em->block_start; | 1843 | logical = em->block_start; |
| 1831 | failrec->bio_flags = EXTENT_BIO_COMPRESSED; | 1844 | failrec->bio_flags = EXTENT_BIO_COMPRESSED; |
| 1845 | extent_set_compress_type(&failrec->bio_flags, | ||
| 1846 | em->compress_type); | ||
| 1832 | } | 1847 | } |
| 1833 | failrec->logical = logical; | 1848 | failrec->logical = logical; |
| 1834 | free_extent_map(em); | 1849 | free_extent_map(em); |
| @@ -3671,8 +3686,12 @@ static int btrfs_setattr_size(struct inode *inode, struct iattr *attr) | |||
| 3671 | static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) | 3686 | static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) |
| 3672 | { | 3687 | { |
| 3673 | struct inode *inode = dentry->d_inode; | 3688 | struct inode *inode = dentry->d_inode; |
| 3689 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 3674 | int err; | 3690 | int err; |
| 3675 | 3691 | ||
| 3692 | if (btrfs_root_readonly(root)) | ||
| 3693 | return -EROFS; | ||
| 3694 | |||
| 3676 | err = inode_change_ok(inode, attr); | 3695 | err = inode_change_ok(inode, attr); |
| 3677 | if (err) | 3696 | if (err) |
| 3678 | return err; | 3697 | return err; |
| @@ -4928,8 +4947,10 @@ static noinline int uncompress_inline(struct btrfs_path *path, | |||
| 4928 | size_t max_size; | 4947 | size_t max_size; |
| 4929 | unsigned long inline_size; | 4948 | unsigned long inline_size; |
| 4930 | unsigned long ptr; | 4949 | unsigned long ptr; |
| 4950 | int compress_type; | ||
| 4931 | 4951 | ||
| 4932 | WARN_ON(pg_offset != 0); | 4952 | WARN_ON(pg_offset != 0); |
| 4953 | compress_type = btrfs_file_extent_compression(leaf, item); | ||
| 4933 | max_size = btrfs_file_extent_ram_bytes(leaf, item); | 4954 | max_size = btrfs_file_extent_ram_bytes(leaf, item); |
| 4934 | inline_size = btrfs_file_extent_inline_item_len(leaf, | 4955 | inline_size = btrfs_file_extent_inline_item_len(leaf, |
| 4935 | btrfs_item_nr(leaf, path->slots[0])); | 4956 | btrfs_item_nr(leaf, path->slots[0])); |
| @@ -4939,8 +4960,8 @@ static noinline int uncompress_inline(struct btrfs_path *path, | |||
| 4939 | read_extent_buffer(leaf, tmp, ptr, inline_size); | 4960 | read_extent_buffer(leaf, tmp, ptr, inline_size); |
| 4940 | 4961 | ||
| 4941 | max_size = min_t(unsigned long, PAGE_CACHE_SIZE, max_size); | 4962 | max_size = min_t(unsigned long, PAGE_CACHE_SIZE, max_size); |
| 4942 | ret = btrfs_zlib_decompress(tmp, page, extent_offset, | 4963 | ret = btrfs_decompress(compress_type, tmp, page, |
| 4943 | inline_size, max_size); | 4964 | extent_offset, inline_size, max_size); |
| 4944 | if (ret) { | 4965 | if (ret) { |
| 4945 | char *kaddr = kmap_atomic(page, KM_USER0); | 4966 | char *kaddr = kmap_atomic(page, KM_USER0); |
| 4946 | unsigned long copy_size = min_t(u64, | 4967 | unsigned long copy_size = min_t(u64, |
| @@ -4982,7 +5003,7 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, | |||
| 4982 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; | 5003 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; |
| 4983 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | 5004 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; |
| 4984 | struct btrfs_trans_handle *trans = NULL; | 5005 | struct btrfs_trans_handle *trans = NULL; |
| 4985 | int compressed; | 5006 | int compress_type; |
| 4986 | 5007 | ||
| 4987 | again: | 5008 | again: |
| 4988 | read_lock(&em_tree->lock); | 5009 | read_lock(&em_tree->lock); |
| @@ -5041,7 +5062,7 @@ again: | |||
| 5041 | 5062 | ||
| 5042 | found_type = btrfs_file_extent_type(leaf, item); | 5063 | found_type = btrfs_file_extent_type(leaf, item); |
| 5043 | extent_start = found_key.offset; | 5064 | extent_start = found_key.offset; |
| 5044 | compressed = btrfs_file_extent_compression(leaf, item); | 5065 | compress_type = btrfs_file_extent_compression(leaf, item); |
| 5045 | if (found_type == BTRFS_FILE_EXTENT_REG || | 5066 | if (found_type == BTRFS_FILE_EXTENT_REG || |
| 5046 | found_type == BTRFS_FILE_EXTENT_PREALLOC) { | 5067 | found_type == BTRFS_FILE_EXTENT_PREALLOC) { |
| 5047 | extent_end = extent_start + | 5068 | extent_end = extent_start + |
| @@ -5087,8 +5108,9 @@ again: | |||
| 5087 | em->block_start = EXTENT_MAP_HOLE; | 5108 | em->block_start = EXTENT_MAP_HOLE; |
| 5088 | goto insert; | 5109 | goto insert; |
| 5089 | } | 5110 | } |
| 5090 | if (compressed) { | 5111 | if (compress_type != BTRFS_COMPRESS_NONE) { |
| 5091 | set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); | 5112 | set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); |
| 5113 | em->compress_type = compress_type; | ||
| 5092 | em->block_start = bytenr; | 5114 | em->block_start = bytenr; |
| 5093 | em->block_len = btrfs_file_extent_disk_num_bytes(leaf, | 5115 | em->block_len = btrfs_file_extent_disk_num_bytes(leaf, |
| 5094 | item); | 5116 | item); |
| @@ -5122,12 +5144,14 @@ again: | |||
| 5122 | em->len = (copy_size + root->sectorsize - 1) & | 5144 | em->len = (copy_size + root->sectorsize - 1) & |
| 5123 | ~((u64)root->sectorsize - 1); | 5145 | ~((u64)root->sectorsize - 1); |
| 5124 | em->orig_start = EXTENT_MAP_INLINE; | 5146 | em->orig_start = EXTENT_MAP_INLINE; |
| 5125 | if (compressed) | 5147 | if (compress_type) { |
| 5126 | set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); | 5148 | set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); |
| 5149 | em->compress_type = compress_type; | ||
| 5150 | } | ||
| 5127 | ptr = btrfs_file_extent_inline_start(item) + extent_offset; | 5151 | ptr = btrfs_file_extent_inline_start(item) + extent_offset; |
| 5128 | if (create == 0 && !PageUptodate(page)) { | 5152 | if (create == 0 && !PageUptodate(page)) { |
| 5129 | if (btrfs_file_extent_compression(leaf, item) == | 5153 | if (btrfs_file_extent_compression(leaf, item) != |
| 5130 | BTRFS_COMPRESS_ZLIB) { | 5154 | BTRFS_COMPRESS_NONE) { |
| 5131 | ret = uncompress_inline(path, inode, page, | 5155 | ret = uncompress_inline(path, inode, page, |
| 5132 | pg_offset, | 5156 | pg_offset, |
| 5133 | extent_offset, item); | 5157 | extent_offset, item); |
| @@ -6477,7 +6501,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) | |||
| 6477 | ei->ordered_data_close = 0; | 6501 | ei->ordered_data_close = 0; |
| 6478 | ei->orphan_meta_reserved = 0; | 6502 | ei->orphan_meta_reserved = 0; |
| 6479 | ei->dummy_inode = 0; | 6503 | ei->dummy_inode = 0; |
| 6480 | ei->force_compress = 0; | 6504 | ei->force_compress = BTRFS_COMPRESS_NONE; |
| 6481 | 6505 | ||
| 6482 | inode = &ei->vfs_inode; | 6506 | inode = &ei->vfs_inode; |
| 6483 | extent_map_tree_init(&ei->extent_tree, GFP_NOFS); | 6507 | extent_map_tree_init(&ei->extent_tree, GFP_NOFS); |
| @@ -7105,6 +7129,10 @@ static int btrfs_set_page_dirty(struct page *page) | |||
| 7105 | 7129 | ||
| 7106 | static int btrfs_permission(struct inode *inode, int mask, unsigned int flags) | 7130 | static int btrfs_permission(struct inode *inode, int mask, unsigned int flags) |
| 7107 | { | 7131 | { |
| 7132 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 7133 | |||
| 7134 | if (btrfs_root_readonly(root) && (mask & MAY_WRITE)) | ||
| 7135 | return -EROFS; | ||
| 7108 | if ((BTRFS_I(inode)->flags & BTRFS_INODE_READONLY) && (mask & MAY_WRITE)) | 7136 | if ((BTRFS_I(inode)->flags & BTRFS_INODE_READONLY) && (mask & MAY_WRITE)) |
| 7109 | return -EACCES; | 7137 | return -EACCES; |
| 7110 | return generic_permission(inode, mask, flags, btrfs_check_acl); | 7138 | return generic_permission(inode, mask, flags, btrfs_check_acl); |
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index f87552a1d7ea..a506a22b522a 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c | |||
| @@ -147,6 +147,9 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) | |||
| 147 | unsigned int flags, oldflags; | 147 | unsigned int flags, oldflags; |
| 148 | int ret; | 148 | int ret; |
| 149 | 149 | ||
| 150 | if (btrfs_root_readonly(root)) | ||
| 151 | return -EROFS; | ||
| 152 | |||
| 150 | if (copy_from_user(&flags, arg, sizeof(flags))) | 153 | if (copy_from_user(&flags, arg, sizeof(flags))) |
| 151 | return -EFAULT; | 154 | return -EFAULT; |
| 152 | 155 | ||
| @@ -360,7 +363,8 @@ fail: | |||
| 360 | } | 363 | } |
| 361 | 364 | ||
| 362 | static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, | 365 | static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, |
| 363 | char *name, int namelen, u64 *async_transid) | 366 | char *name, int namelen, u64 *async_transid, |
| 367 | bool readonly) | ||
| 364 | { | 368 | { |
| 365 | struct inode *inode; | 369 | struct inode *inode; |
| 366 | struct dentry *parent; | 370 | struct dentry *parent; |
| @@ -378,6 +382,7 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, | |||
| 378 | btrfs_init_block_rsv(&pending_snapshot->block_rsv); | 382 | btrfs_init_block_rsv(&pending_snapshot->block_rsv); |
| 379 | pending_snapshot->dentry = dentry; | 383 | pending_snapshot->dentry = dentry; |
| 380 | pending_snapshot->root = root; | 384 | pending_snapshot->root = root; |
| 385 | pending_snapshot->readonly = readonly; | ||
| 381 | 386 | ||
| 382 | trans = btrfs_start_transaction(root->fs_info->extent_root, 5); | 387 | trans = btrfs_start_transaction(root->fs_info->extent_root, 5); |
| 383 | if (IS_ERR(trans)) { | 388 | if (IS_ERR(trans)) { |
| @@ -509,7 +514,7 @@ static inline int btrfs_may_create(struct inode *dir, struct dentry *child) | |||
| 509 | static noinline int btrfs_mksubvol(struct path *parent, | 514 | static noinline int btrfs_mksubvol(struct path *parent, |
| 510 | char *name, int namelen, | 515 | char *name, int namelen, |
| 511 | struct btrfs_root *snap_src, | 516 | struct btrfs_root *snap_src, |
| 512 | u64 *async_transid) | 517 | u64 *async_transid, bool readonly) |
| 513 | { | 518 | { |
| 514 | struct inode *dir = parent->dentry->d_inode; | 519 | struct inode *dir = parent->dentry->d_inode; |
| 515 | struct dentry *dentry; | 520 | struct dentry *dentry; |
| @@ -541,7 +546,7 @@ static noinline int btrfs_mksubvol(struct path *parent, | |||
| 541 | 546 | ||
| 542 | if (snap_src) { | 547 | if (snap_src) { |
| 543 | error = create_snapshot(snap_src, dentry, | 548 | error = create_snapshot(snap_src, dentry, |
| 544 | name, namelen, async_transid); | 549 | name, namelen, async_transid, readonly); |
| 545 | } else { | 550 | } else { |
| 546 | error = create_subvol(BTRFS_I(dir)->root, dentry, | 551 | error = create_subvol(BTRFS_I(dir)->root, dentry, |
| 547 | name, namelen, async_transid); | 552 | name, namelen, async_transid); |
| @@ -638,9 +643,11 @@ static int btrfs_defrag_file(struct file *file, | |||
| 638 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | 643 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; |
| 639 | struct btrfs_ordered_extent *ordered; | 644 | struct btrfs_ordered_extent *ordered; |
| 640 | struct page *page; | 645 | struct page *page; |
| 646 | struct btrfs_super_block *disk_super; | ||
| 641 | unsigned long last_index; | 647 | unsigned long last_index; |
| 642 | unsigned long ra_pages = root->fs_info->bdi.ra_pages; | 648 | unsigned long ra_pages = root->fs_info->bdi.ra_pages; |
| 643 | unsigned long total_read = 0; | 649 | unsigned long total_read = 0; |
| 650 | u64 features; | ||
| 644 | u64 page_start; | 651 | u64 page_start; |
| 645 | u64 page_end; | 652 | u64 page_end; |
| 646 | u64 last_len = 0; | 653 | u64 last_len = 0; |
| @@ -648,6 +655,14 @@ static int btrfs_defrag_file(struct file *file, | |||
| 648 | u64 defrag_end = 0; | 655 | u64 defrag_end = 0; |
| 649 | unsigned long i; | 656 | unsigned long i; |
| 650 | int ret; | 657 | int ret; |
| 658 | int compress_type = BTRFS_COMPRESS_ZLIB; | ||
| 659 | |||
| 660 | if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) { | ||
| 661 | if (range->compress_type > BTRFS_COMPRESS_TYPES) | ||
| 662 | return -EINVAL; | ||
| 663 | if (range->compress_type) | ||
| 664 | compress_type = range->compress_type; | ||
| 665 | } | ||
| 651 | 666 | ||
| 652 | if (inode->i_size == 0) | 667 | if (inode->i_size == 0) |
| 653 | return 0; | 668 | return 0; |
| @@ -683,7 +698,7 @@ static int btrfs_defrag_file(struct file *file, | |||
| 683 | total_read++; | 698 | total_read++; |
| 684 | mutex_lock(&inode->i_mutex); | 699 | mutex_lock(&inode->i_mutex); |
| 685 | if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) | 700 | if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) |
| 686 | BTRFS_I(inode)->force_compress = 1; | 701 | BTRFS_I(inode)->force_compress = compress_type; |
| 687 | 702 | ||
| 688 | ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE); | 703 | ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE); |
| 689 | if (ret) | 704 | if (ret) |
| @@ -781,10 +796,17 @@ loop_unlock: | |||
| 781 | atomic_dec(&root->fs_info->async_submit_draining); | 796 | atomic_dec(&root->fs_info->async_submit_draining); |
| 782 | 797 | ||
| 783 | mutex_lock(&inode->i_mutex); | 798 | mutex_lock(&inode->i_mutex); |
| 784 | BTRFS_I(inode)->force_compress = 0; | 799 | BTRFS_I(inode)->force_compress = BTRFS_COMPRESS_NONE; |
| 785 | mutex_unlock(&inode->i_mutex); | 800 | mutex_unlock(&inode->i_mutex); |
| 786 | } | 801 | } |
| 787 | 802 | ||
| 803 | disk_super = &root->fs_info->super_copy; | ||
| 804 | features = btrfs_super_incompat_flags(disk_super); | ||
| 805 | if (range->compress_type == BTRFS_COMPRESS_LZO) { | ||
| 806 | features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO; | ||
| 807 | btrfs_set_super_incompat_flags(disk_super, features); | ||
| 808 | } | ||
| 809 | |||
| 788 | return 0; | 810 | return 0; |
| 789 | 811 | ||
| 790 | err_reservations: | 812 | err_reservations: |
| @@ -901,7 +923,8 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file, | |||
| 901 | char *name, | 923 | char *name, |
| 902 | unsigned long fd, | 924 | unsigned long fd, |
| 903 | int subvol, | 925 | int subvol, |
| 904 | u64 *transid) | 926 | u64 *transid, |
| 927 | bool readonly) | ||
| 905 | { | 928 | { |
| 906 | struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; | 929 | struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; |
| 907 | struct file *src_file; | 930 | struct file *src_file; |
| @@ -919,7 +942,7 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file, | |||
| 919 | 942 | ||
| 920 | if (subvol) { | 943 | if (subvol) { |
| 921 | ret = btrfs_mksubvol(&file->f_path, name, namelen, | 944 | ret = btrfs_mksubvol(&file->f_path, name, namelen, |
| 922 | NULL, transid); | 945 | NULL, transid, readonly); |
| 923 | } else { | 946 | } else { |
| 924 | struct inode *src_inode; | 947 | struct inode *src_inode; |
| 925 | src_file = fget(fd); | 948 | src_file = fget(fd); |
| @@ -938,7 +961,7 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file, | |||
| 938 | } | 961 | } |
| 939 | ret = btrfs_mksubvol(&file->f_path, name, namelen, | 962 | ret = btrfs_mksubvol(&file->f_path, name, namelen, |
| 940 | BTRFS_I(src_inode)->root, | 963 | BTRFS_I(src_inode)->root, |
| 941 | transid); | 964 | transid, readonly); |
| 942 | fput(src_file); | 965 | fput(src_file); |
| 943 | } | 966 | } |
| 944 | out: | 967 | out: |
| @@ -946,58 +969,139 @@ out: | |||
| 946 | } | 969 | } |
| 947 | 970 | ||
| 948 | static noinline int btrfs_ioctl_snap_create(struct file *file, | 971 | static noinline int btrfs_ioctl_snap_create(struct file *file, |
| 949 | void __user *arg, int subvol, | 972 | void __user *arg, int subvol) |
| 950 | int v2) | ||
| 951 | { | 973 | { |
| 952 | struct btrfs_ioctl_vol_args *vol_args = NULL; | 974 | struct btrfs_ioctl_vol_args *vol_args; |
| 953 | struct btrfs_ioctl_vol_args_v2 *vol_args_v2 = NULL; | ||
| 954 | char *name; | ||
| 955 | u64 fd; | ||
| 956 | int ret; | 975 | int ret; |
| 957 | 976 | ||
| 958 | if (v2) { | 977 | vol_args = memdup_user(arg, sizeof(*vol_args)); |
| 959 | u64 transid = 0; | 978 | if (IS_ERR(vol_args)) |
| 960 | u64 *ptr = NULL; | 979 | return PTR_ERR(vol_args); |
| 980 | vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; | ||
| 961 | 981 | ||
| 962 | vol_args_v2 = memdup_user(arg, sizeof(*vol_args_v2)); | 982 | ret = btrfs_ioctl_snap_create_transid(file, vol_args->name, |
| 963 | if (IS_ERR(vol_args_v2)) | 983 | vol_args->fd, subvol, |
| 964 | return PTR_ERR(vol_args_v2); | 984 | NULL, false); |
| 965 | 985 | ||
| 966 | if (vol_args_v2->flags & ~BTRFS_SUBVOL_CREATE_ASYNC) { | 986 | kfree(vol_args); |
| 967 | ret = -EINVAL; | 987 | return ret; |
| 968 | goto out; | 988 | } |
| 969 | } | ||
| 970 | |||
| 971 | name = vol_args_v2->name; | ||
| 972 | fd = vol_args_v2->fd; | ||
| 973 | vol_args_v2->name[BTRFS_SUBVOL_NAME_MAX] = '\0'; | ||
| 974 | 989 | ||
| 975 | if (vol_args_v2->flags & BTRFS_SUBVOL_CREATE_ASYNC) | 990 | static noinline int btrfs_ioctl_snap_create_v2(struct file *file, |
| 976 | ptr = &transid; | 991 | void __user *arg, int subvol) |
| 992 | { | ||
| 993 | struct btrfs_ioctl_vol_args_v2 *vol_args; | ||
| 994 | int ret; | ||
| 995 | u64 transid = 0; | ||
| 996 | u64 *ptr = NULL; | ||
| 997 | bool readonly = false; | ||
| 977 | 998 | ||
| 978 | ret = btrfs_ioctl_snap_create_transid(file, name, fd, | 999 | vol_args = memdup_user(arg, sizeof(*vol_args)); |
| 979 | subvol, ptr); | 1000 | if (IS_ERR(vol_args)) |
| 1001 | return PTR_ERR(vol_args); | ||
| 1002 | vol_args->name[BTRFS_SUBVOL_NAME_MAX] = '\0'; | ||
| 980 | 1003 | ||
| 981 | if (ret == 0 && ptr && | 1004 | if (vol_args->flags & |
| 982 | copy_to_user(arg + | 1005 | ~(BTRFS_SUBVOL_CREATE_ASYNC | BTRFS_SUBVOL_RDONLY)) { |
| 983 | offsetof(struct btrfs_ioctl_vol_args_v2, | 1006 | ret = -EOPNOTSUPP; |
| 984 | transid), ptr, sizeof(*ptr))) | 1007 | goto out; |
| 985 | ret = -EFAULT; | ||
| 986 | } else { | ||
| 987 | vol_args = memdup_user(arg, sizeof(*vol_args)); | ||
| 988 | if (IS_ERR(vol_args)) | ||
| 989 | return PTR_ERR(vol_args); | ||
| 990 | name = vol_args->name; | ||
| 991 | fd = vol_args->fd; | ||
| 992 | vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; | ||
| 993 | |||
| 994 | ret = btrfs_ioctl_snap_create_transid(file, name, fd, | ||
| 995 | subvol, NULL); | ||
| 996 | } | 1008 | } |
| 1009 | |||
| 1010 | if (vol_args->flags & BTRFS_SUBVOL_CREATE_ASYNC) | ||
| 1011 | ptr = &transid; | ||
| 1012 | if (vol_args->flags & BTRFS_SUBVOL_RDONLY) | ||
| 1013 | readonly = true; | ||
| 1014 | |||
| 1015 | ret = btrfs_ioctl_snap_create_transid(file, vol_args->name, | ||
| 1016 | vol_args->fd, subvol, | ||
| 1017 | ptr, readonly); | ||
| 1018 | |||
| 1019 | if (ret == 0 && ptr && | ||
| 1020 | copy_to_user(arg + | ||
| 1021 | offsetof(struct btrfs_ioctl_vol_args_v2, | ||
| 1022 | transid), ptr, sizeof(*ptr))) | ||
| 1023 | ret = -EFAULT; | ||
| 997 | out: | 1024 | out: |
| 998 | kfree(vol_args); | 1025 | kfree(vol_args); |
| 999 | kfree(vol_args_v2); | 1026 | return ret; |
| 1027 | } | ||
| 1000 | 1028 | ||
| 1029 | static noinline int btrfs_ioctl_subvol_getflags(struct file *file, | ||
| 1030 | void __user *arg) | ||
| 1031 | { | ||
| 1032 | struct inode *inode = fdentry(file)->d_inode; | ||
| 1033 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 1034 | int ret = 0; | ||
| 1035 | u64 flags = 0; | ||
| 1036 | |||
| 1037 | if (inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) | ||
| 1038 | return -EINVAL; | ||
| 1039 | |||
| 1040 | down_read(&root->fs_info->subvol_sem); | ||
| 1041 | if (btrfs_root_readonly(root)) | ||
| 1042 | flags |= BTRFS_SUBVOL_RDONLY; | ||
| 1043 | up_read(&root->fs_info->subvol_sem); | ||
| 1044 | |||
| 1045 | if (copy_to_user(arg, &flags, sizeof(flags))) | ||
| 1046 | ret = -EFAULT; | ||
| 1047 | |||
| 1048 | return ret; | ||
| 1049 | } | ||
| 1050 | |||
| 1051 | static noinline int btrfs_ioctl_subvol_setflags(struct file *file, | ||
| 1052 | void __user *arg) | ||
| 1053 | { | ||
| 1054 | struct inode *inode = fdentry(file)->d_inode; | ||
| 1055 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 1056 | struct btrfs_trans_handle *trans; | ||
| 1057 | u64 root_flags; | ||
| 1058 | u64 flags; | ||
| 1059 | int ret = 0; | ||
| 1060 | |||
| 1061 | if (root->fs_info->sb->s_flags & MS_RDONLY) | ||
| 1062 | return -EROFS; | ||
| 1063 | |||
| 1064 | if (inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) | ||
| 1065 | return -EINVAL; | ||
| 1066 | |||
| 1067 | if (copy_from_user(&flags, arg, sizeof(flags))) | ||
| 1068 | return -EFAULT; | ||
| 1069 | |||
| 1070 | if (flags & ~BTRFS_SUBVOL_CREATE_ASYNC) | ||
| 1071 | return -EINVAL; | ||
| 1072 | |||
| 1073 | if (flags & ~BTRFS_SUBVOL_RDONLY) | ||
| 1074 | return -EOPNOTSUPP; | ||
| 1075 | |||
| 1076 | down_write(&root->fs_info->subvol_sem); | ||
| 1077 | |||
| 1078 | /* nothing to do */ | ||
| 1079 | if (!!(flags & BTRFS_SUBVOL_RDONLY) == btrfs_root_readonly(root)) | ||
| 1080 | goto out; | ||
| 1081 | |||
| 1082 | root_flags = btrfs_root_flags(&root->root_item); | ||
| 1083 | if (flags & BTRFS_SUBVOL_RDONLY) | ||
| 1084 | btrfs_set_root_flags(&root->root_item, | ||
| 1085 | root_flags | BTRFS_ROOT_SUBVOL_RDONLY); | ||
| 1086 | else | ||
| 1087 | btrfs_set_root_flags(&root->root_item, | ||
| 1088 | root_flags & ~BTRFS_ROOT_SUBVOL_RDONLY); | ||
| 1089 | |||
| 1090 | trans = btrfs_start_transaction(root, 1); | ||
| 1091 | if (IS_ERR(trans)) { | ||
| 1092 | ret = PTR_ERR(trans); | ||
| 1093 | goto out_reset; | ||
| 1094 | } | ||
| 1095 | |||
| 1096 | ret = btrfs_update_root(trans, root, | ||
| 1097 | &root->root_key, &root->root_item); | ||
| 1098 | |||
| 1099 | btrfs_commit_transaction(trans, root); | ||
| 1100 | out_reset: | ||
| 1101 | if (ret) | ||
| 1102 | btrfs_set_root_flags(&root->root_item, root_flags); | ||
| 1103 | out: | ||
| 1104 | up_write(&root->fs_info->subvol_sem); | ||
| 1001 | return ret; | 1105 | return ret; |
| 1002 | } | 1106 | } |
| 1003 | 1107 | ||
| @@ -1509,6 +1613,9 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp) | |||
| 1509 | struct btrfs_ioctl_defrag_range_args *range; | 1613 | struct btrfs_ioctl_defrag_range_args *range; |
| 1510 | int ret; | 1614 | int ret; |
| 1511 | 1615 | ||
| 1616 | if (btrfs_root_readonly(root)) | ||
| 1617 | return -EROFS; | ||
| 1618 | |||
| 1512 | ret = mnt_want_write(file->f_path.mnt); | 1619 | ret = mnt_want_write(file->f_path.mnt); |
| 1513 | if (ret) | 1620 | if (ret) |
| 1514 | return ret; | 1621 | return ret; |
| @@ -1637,6 +1744,9 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
| 1637 | if (!(file->f_mode & FMODE_WRITE) || (file->f_flags & O_APPEND)) | 1744 | if (!(file->f_mode & FMODE_WRITE) || (file->f_flags & O_APPEND)) |
| 1638 | return -EINVAL; | 1745 | return -EINVAL; |
| 1639 | 1746 | ||
| 1747 | if (btrfs_root_readonly(root)) | ||
| 1748 | return -EROFS; | ||
| 1749 | |||
| 1640 | ret = mnt_want_write(file->f_path.mnt); | 1750 | ret = mnt_want_write(file->f_path.mnt); |
| 1641 | if (ret) | 1751 | if (ret) |
| 1642 | return ret; | 1752 | return ret; |
| @@ -1958,6 +2068,10 @@ static long btrfs_ioctl_trans_start(struct file *file) | |||
| 1958 | if (file->private_data) | 2068 | if (file->private_data) |
| 1959 | goto out; | 2069 | goto out; |
| 1960 | 2070 | ||
| 2071 | ret = -EROFS; | ||
| 2072 | if (btrfs_root_readonly(root)) | ||
| 2073 | goto out; | ||
| 2074 | |||
| 1961 | ret = mnt_want_write(file->f_path.mnt); | 2075 | ret = mnt_want_write(file->f_path.mnt); |
| 1962 | if (ret) | 2076 | if (ret) |
| 1963 | goto out; | 2077 | goto out; |
| @@ -2257,13 +2371,17 @@ long btrfs_ioctl(struct file *file, unsigned int | |||
| 2257 | case FS_IOC_GETVERSION: | 2371 | case FS_IOC_GETVERSION: |
| 2258 | return btrfs_ioctl_getversion(file, argp); | 2372 | return btrfs_ioctl_getversion(file, argp); |
| 2259 | case BTRFS_IOC_SNAP_CREATE: | 2373 | case BTRFS_IOC_SNAP_CREATE: |
| 2260 | return btrfs_ioctl_snap_create(file, argp, 0, 0); | 2374 | return btrfs_ioctl_snap_create(file, argp, 0); |
| 2261 | case BTRFS_IOC_SNAP_CREATE_V2: | 2375 | case BTRFS_IOC_SNAP_CREATE_V2: |
| 2262 | return btrfs_ioctl_snap_create(file, argp, 0, 1); | 2376 | return btrfs_ioctl_snap_create_v2(file, argp, 0); |
| 2263 | case BTRFS_IOC_SUBVOL_CREATE: | 2377 | case BTRFS_IOC_SUBVOL_CREATE: |
| 2264 | return btrfs_ioctl_snap_create(file, argp, 1, 0); | 2378 | return btrfs_ioctl_snap_create(file, argp, 1); |
| 2265 | case BTRFS_IOC_SNAP_DESTROY: | 2379 | case BTRFS_IOC_SNAP_DESTROY: |
| 2266 | return btrfs_ioctl_snap_destroy(file, argp); | 2380 | return btrfs_ioctl_snap_destroy(file, argp); |
| 2381 | case BTRFS_IOC_SUBVOL_GETFLAGS: | ||
| 2382 | return btrfs_ioctl_subvol_getflags(file, argp); | ||
| 2383 | case BTRFS_IOC_SUBVOL_SETFLAGS: | ||
| 2384 | return btrfs_ioctl_subvol_setflags(file, argp); | ||
| 2267 | case BTRFS_IOC_DEFAULT_SUBVOL: | 2385 | case BTRFS_IOC_DEFAULT_SUBVOL: |
| 2268 | return btrfs_ioctl_default_subvol(file, argp); | 2386 | return btrfs_ioctl_default_subvol(file, argp); |
| 2269 | case BTRFS_IOC_DEFRAG: | 2387 | case BTRFS_IOC_DEFRAG: |
diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h index c344d12c646b..8fb382167b13 100644 --- a/fs/btrfs/ioctl.h +++ b/fs/btrfs/ioctl.h | |||
| @@ -31,6 +31,7 @@ struct btrfs_ioctl_vol_args { | |||
| 31 | }; | 31 | }; |
| 32 | 32 | ||
| 33 | #define BTRFS_SUBVOL_CREATE_ASYNC (1ULL << 0) | 33 | #define BTRFS_SUBVOL_CREATE_ASYNC (1ULL << 0) |
| 34 | #define BTRFS_SUBVOL_RDONLY (1ULL << 1) | ||
| 34 | 35 | ||
| 35 | #define BTRFS_SUBVOL_NAME_MAX 4039 | 36 | #define BTRFS_SUBVOL_NAME_MAX 4039 |
| 36 | struct btrfs_ioctl_vol_args_v2 { | 37 | struct btrfs_ioctl_vol_args_v2 { |
| @@ -133,8 +134,15 @@ struct btrfs_ioctl_defrag_range_args { | |||
| 133 | */ | 134 | */ |
| 134 | __u32 extent_thresh; | 135 | __u32 extent_thresh; |
| 135 | 136 | ||
| 137 | /* | ||
| 138 | * which compression method to use if turning on compression | ||
| 139 | * for this defrag operation. If unspecified, zlib will | ||
| 140 | * be used | ||
| 141 | */ | ||
| 142 | __u32 compress_type; | ||
| 143 | |||
| 136 | /* spare for later */ | 144 | /* spare for later */ |
| 137 | __u32 unused[5]; | 145 | __u32 unused[4]; |
| 138 | }; | 146 | }; |
| 139 | 147 | ||
| 140 | struct btrfs_ioctl_space_info { | 148 | struct btrfs_ioctl_space_info { |
| @@ -193,4 +201,6 @@ struct btrfs_ioctl_space_args { | |||
| 193 | #define BTRFS_IOC_WAIT_SYNC _IOW(BTRFS_IOCTL_MAGIC, 22, __u64) | 201 | #define BTRFS_IOC_WAIT_SYNC _IOW(BTRFS_IOCTL_MAGIC, 22, __u64) |
| 194 | #define BTRFS_IOC_SNAP_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 23, \ | 202 | #define BTRFS_IOC_SNAP_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 23, \ |
| 195 | struct btrfs_ioctl_vol_args_v2) | 203 | struct btrfs_ioctl_vol_args_v2) |
| 204 | #define BTRFS_IOC_SUBVOL_GETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 25, __u64) | ||
| 205 | #define BTRFS_IOC_SUBVOL_SETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 26, __u64) | ||
| 196 | #endif | 206 | #endif |
diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c new file mode 100644 index 000000000000..cc9b450399df --- /dev/null +++ b/fs/btrfs/lzo.c | |||
| @@ -0,0 +1,420 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2008 Oracle. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or | ||
| 5 | * modify it under the terms of the GNU General Public | ||
| 6 | * License v2 as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope that it will be useful, | ||
| 9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 11 | * General Public License for more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public | ||
| 14 | * License along with this program; if not, write to the | ||
| 15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
| 16 | * Boston, MA 021110-1307, USA. | ||
| 17 | */ | ||
| 18 | |||
| 19 | #include <linux/kernel.h> | ||
| 20 | #include <linux/slab.h> | ||
| 21 | #include <linux/vmalloc.h> | ||
| 22 | #include <linux/init.h> | ||
| 23 | #include <linux/err.h> | ||
| 24 | #include <linux/sched.h> | ||
| 25 | #include <linux/pagemap.h> | ||
| 26 | #include <linux/bio.h> | ||
| 27 | #include <linux/lzo.h> | ||
| 28 | #include "compression.h" | ||
| 29 | |||
| 30 | #define LZO_LEN 4 | ||
| 31 | |||
| 32 | struct workspace { | ||
| 33 | void *mem; | ||
| 34 | void *buf; /* where compressed data goes */ | ||
| 35 | void *cbuf; /* where decompressed data goes */ | ||
| 36 | struct list_head list; | ||
| 37 | }; | ||
| 38 | |||
| 39 | static void lzo_free_workspace(struct list_head *ws) | ||
| 40 | { | ||
| 41 | struct workspace *workspace = list_entry(ws, struct workspace, list); | ||
| 42 | |||
| 43 | vfree(workspace->buf); | ||
| 44 | vfree(workspace->cbuf); | ||
| 45 | vfree(workspace->mem); | ||
| 46 | kfree(workspace); | ||
| 47 | } | ||
| 48 | |||
| 49 | static struct list_head *lzo_alloc_workspace(void) | ||
| 50 | { | ||
| 51 | struct workspace *workspace; | ||
| 52 | |||
| 53 | workspace = kzalloc(sizeof(*workspace), GFP_NOFS); | ||
| 54 | if (!workspace) | ||
| 55 | return ERR_PTR(-ENOMEM); | ||
| 56 | |||
| 57 | workspace->mem = vmalloc(LZO1X_MEM_COMPRESS); | ||
| 58 | workspace->buf = vmalloc(lzo1x_worst_compress(PAGE_CACHE_SIZE)); | ||
| 59 | workspace->cbuf = vmalloc(lzo1x_worst_compress(PAGE_CACHE_SIZE)); | ||
| 60 | if (!workspace->mem || !workspace->buf || !workspace->cbuf) | ||
| 61 | goto fail; | ||
| 62 | |||
| 63 | INIT_LIST_HEAD(&workspace->list); | ||
| 64 | |||
| 65 | return &workspace->list; | ||
| 66 | fail: | ||
| 67 | lzo_free_workspace(&workspace->list); | ||
| 68 | return ERR_PTR(-ENOMEM); | ||
| 69 | } | ||
| 70 | |||
| 71 | static inline void write_compress_length(char *buf, size_t len) | ||
| 72 | { | ||
| 73 | __le32 dlen; | ||
| 74 | |||
| 75 | dlen = cpu_to_le32(len); | ||
| 76 | memcpy(buf, &dlen, LZO_LEN); | ||
| 77 | } | ||
| 78 | |||
| 79 | static inline size_t read_compress_length(char *buf) | ||
| 80 | { | ||
| 81 | __le32 dlen; | ||
| 82 | |||
| 83 | memcpy(&dlen, buf, LZO_LEN); | ||
| 84 | return le32_to_cpu(dlen); | ||
| 85 | } | ||
| 86 | |||
| 87 | static int lzo_compress_pages(struct list_head *ws, | ||
| 88 | struct address_space *mapping, | ||
| 89 | u64 start, unsigned long len, | ||
| 90 | struct page **pages, | ||
| 91 | unsigned long nr_dest_pages, | ||
| 92 | unsigned long *out_pages, | ||
| 93 | unsigned long *total_in, | ||
| 94 | unsigned long *total_out, | ||
| 95 | unsigned long max_out) | ||
| 96 | { | ||
| 97 | struct workspace *workspace = list_entry(ws, struct workspace, list); | ||
| 98 | int ret = 0; | ||
| 99 | char *data_in; | ||
| 100 | char *cpage_out; | ||
| 101 | int nr_pages = 0; | ||
| 102 | struct page *in_page = NULL; | ||
| 103 | struct page *out_page = NULL; | ||
| 104 | unsigned long bytes_left; | ||
| 105 | |||
| 106 | size_t in_len; | ||
| 107 | size_t out_len; | ||
| 108 | char *buf; | ||
| 109 | unsigned long tot_in = 0; | ||
| 110 | unsigned long tot_out = 0; | ||
| 111 | unsigned long pg_bytes_left; | ||
| 112 | unsigned long out_offset; | ||
| 113 | unsigned long bytes; | ||
| 114 | |||
| 115 | *out_pages = 0; | ||
| 116 | *total_out = 0; | ||
| 117 | *total_in = 0; | ||
| 118 | |||
| 119 | in_page = find_get_page(mapping, start >> PAGE_CACHE_SHIFT); | ||
| 120 | data_in = kmap(in_page); | ||
| 121 | |||
| 122 | /* | ||
| 123 | * store the size of all chunks of compressed data in | ||
| 124 | * the first 4 bytes | ||
| 125 | */ | ||
| 126 | out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); | ||
| 127 | if (out_page == NULL) { | ||
| 128 | ret = -ENOMEM; | ||
| 129 | goto out; | ||
| 130 | } | ||
| 131 | cpage_out = kmap(out_page); | ||
| 132 | out_offset = LZO_LEN; | ||
| 133 | tot_out = LZO_LEN; | ||
| 134 | pages[0] = out_page; | ||
| 135 | nr_pages = 1; | ||
| 136 | pg_bytes_left = PAGE_CACHE_SIZE - LZO_LEN; | ||
| 137 | |||
| 138 | /* compress at most one page of data each time */ | ||
| 139 | in_len = min(len, PAGE_CACHE_SIZE); | ||
| 140 | while (tot_in < len) { | ||
| 141 | ret = lzo1x_1_compress(data_in, in_len, workspace->cbuf, | ||
| 142 | &out_len, workspace->mem); | ||
| 143 | if (ret != LZO_E_OK) { | ||
| 144 | printk(KERN_DEBUG "btrfs deflate in loop returned %d\n", | ||
| 145 | ret); | ||
| 146 | ret = -1; | ||
| 147 | goto out; | ||
| 148 | } | ||
| 149 | |||
| 150 | /* store the size of this chunk of compressed data */ | ||
| 151 | write_compress_length(cpage_out + out_offset, out_len); | ||
| 152 | tot_out += LZO_LEN; | ||
| 153 | out_offset += LZO_LEN; | ||
| 154 | pg_bytes_left -= LZO_LEN; | ||
| 155 | |||
| 156 | tot_in += in_len; | ||
| 157 | tot_out += out_len; | ||
| 158 | |||
| 159 | /* copy bytes from the working buffer into the pages */ | ||
| 160 | buf = workspace->cbuf; | ||
| 161 | while (out_len) { | ||
| 162 | bytes = min_t(unsigned long, pg_bytes_left, out_len); | ||
| 163 | |||
| 164 | memcpy(cpage_out + out_offset, buf, bytes); | ||
| 165 | |||
| 166 | out_len -= bytes; | ||
| 167 | pg_bytes_left -= bytes; | ||
| 168 | buf += bytes; | ||
| 169 | out_offset += bytes; | ||
| 170 | |||
| 171 | /* | ||
| 172 | * we need another page for writing out. | ||
| 173 | * | ||
| 174 | * Note if there's less than 4 bytes left, we just | ||
| 175 | * skip to a new page. | ||
| 176 | */ | ||
| 177 | if ((out_len == 0 && pg_bytes_left < LZO_LEN) || | ||
| 178 | pg_bytes_left == 0) { | ||
| 179 | if (pg_bytes_left) { | ||
| 180 | memset(cpage_out + out_offset, 0, | ||
| 181 | pg_bytes_left); | ||
| 182 | tot_out += pg_bytes_left; | ||
| 183 | } | ||
| 184 | |||
| 185 | /* we're done, don't allocate new page */ | ||
| 186 | if (out_len == 0 && tot_in >= len) | ||
| 187 | break; | ||
| 188 | |||
| 189 | kunmap(out_page); | ||
| 190 | if (nr_pages == nr_dest_pages) { | ||
| 191 | out_page = NULL; | ||
| 192 | ret = -1; | ||
| 193 | goto out; | ||
| 194 | } | ||
| 195 | |||
| 196 | out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); | ||
| 197 | if (out_page == NULL) { | ||
| 198 | ret = -ENOMEM; | ||
| 199 | goto out; | ||
| 200 | } | ||
| 201 | cpage_out = kmap(out_page); | ||
| 202 | pages[nr_pages++] = out_page; | ||
| 203 | |||
| 204 | pg_bytes_left = PAGE_CACHE_SIZE; | ||
| 205 | out_offset = 0; | ||
| 206 | } | ||
| 207 | } | ||
| 208 | |||
| 209 | /* we're making it bigger, give up */ | ||
| 210 | if (tot_in > 8192 && tot_in < tot_out) | ||
| 211 | goto out; | ||
| 212 | |||
| 213 | /* we're all done */ | ||
| 214 | if (tot_in >= len) | ||
| 215 | break; | ||
| 216 | |||
| 217 | if (tot_out > max_out) | ||
| 218 | break; | ||
| 219 | |||
| 220 | bytes_left = len - tot_in; | ||
| 221 | kunmap(in_page); | ||
| 222 | page_cache_release(in_page); | ||
| 223 | |||
| 224 | start += PAGE_CACHE_SIZE; | ||
| 225 | in_page = find_get_page(mapping, start >> PAGE_CACHE_SHIFT); | ||
| 226 | data_in = kmap(in_page); | ||
| 227 | in_len = min(bytes_left, PAGE_CACHE_SIZE); | ||
| 228 | } | ||
| 229 | |||
| 230 | if (tot_out > tot_in) | ||
| 231 | goto out; | ||
| 232 | |||
| 233 | /* store the size of all chunks of compressed data */ | ||
| 234 | cpage_out = kmap(pages[0]); | ||
| 235 | write_compress_length(cpage_out, tot_out); | ||
| 236 | |||
| 237 | kunmap(pages[0]); | ||
| 238 | |||
| 239 | ret = 0; | ||
| 240 | *total_out = tot_out; | ||
| 241 | *total_in = tot_in; | ||
| 242 | out: | ||
| 243 | *out_pages = nr_pages; | ||
| 244 | if (out_page) | ||
| 245 | kunmap(out_page); | ||
| 246 | |||
| 247 | if (in_page) { | ||
| 248 | kunmap(in_page); | ||
| 249 | page_cache_release(in_page); | ||
| 250 | } | ||
| 251 | |||
| 252 | return ret; | ||
| 253 | } | ||
| 254 | |||
| 255 | static int lzo_decompress_biovec(struct list_head *ws, | ||
| 256 | struct page **pages_in, | ||
| 257 | u64 disk_start, | ||
| 258 | struct bio_vec *bvec, | ||
| 259 | int vcnt, | ||
| 260 | size_t srclen) | ||
| 261 | { | ||
| 262 | struct workspace *workspace = list_entry(ws, struct workspace, list); | ||
| 263 | int ret = 0, ret2; | ||
| 264 | char *data_in; | ||
| 265 | unsigned long page_in_index = 0; | ||
| 266 | unsigned long page_out_index = 0; | ||
| 267 | unsigned long total_pages_in = (srclen + PAGE_CACHE_SIZE - 1) / | ||
| 268 | PAGE_CACHE_SIZE; | ||
| 269 | unsigned long buf_start; | ||
| 270 | unsigned long buf_offset = 0; | ||
| 271 | unsigned long bytes; | ||
| 272 | unsigned long working_bytes; | ||
| 273 | unsigned long pg_offset; | ||
| 274 | |||
| 275 | size_t in_len; | ||
| 276 | size_t out_len; | ||
| 277 | unsigned long in_offset; | ||
| 278 | unsigned long in_page_bytes_left; | ||
| 279 | unsigned long tot_in; | ||
| 280 | unsigned long tot_out; | ||
| 281 | unsigned long tot_len; | ||
| 282 | char *buf; | ||
| 283 | |||
| 284 | data_in = kmap(pages_in[0]); | ||
| 285 | tot_len = read_compress_length(data_in); | ||
| 286 | |||
| 287 | tot_in = LZO_LEN; | ||
| 288 | in_offset = LZO_LEN; | ||
| 289 | tot_len = min_t(size_t, srclen, tot_len); | ||
| 290 | in_page_bytes_left = PAGE_CACHE_SIZE - LZO_LEN; | ||
| 291 | |||
| 292 | tot_out = 0; | ||
| 293 | pg_offset = 0; | ||
| 294 | |||
| 295 | while (tot_in < tot_len) { | ||
| 296 | in_len = read_compress_length(data_in + in_offset); | ||
| 297 | in_page_bytes_left -= LZO_LEN; | ||
| 298 | in_offset += LZO_LEN; | ||
| 299 | tot_in += LZO_LEN; | ||
| 300 | |||
| 301 | tot_in += in_len; | ||
| 302 | working_bytes = in_len; | ||
| 303 | |||
| 304 | /* fast path: avoid using the working buffer */ | ||
| 305 | if (in_page_bytes_left >= in_len) { | ||
| 306 | buf = data_in + in_offset; | ||
| 307 | bytes = in_len; | ||
| 308 | goto cont; | ||
| 309 | } | ||
| 310 | |||
| 311 | /* copy bytes from the pages into the working buffer */ | ||
| 312 | buf = workspace->cbuf; | ||
| 313 | buf_offset = 0; | ||
| 314 | while (working_bytes) { | ||
| 315 | bytes = min(working_bytes, in_page_bytes_left); | ||
| 316 | |||
| 317 | memcpy(buf + buf_offset, data_in + in_offset, bytes); | ||
| 318 | buf_offset += bytes; | ||
| 319 | cont: | ||
| 320 | working_bytes -= bytes; | ||
| 321 | in_page_bytes_left -= bytes; | ||
| 322 | in_offset += bytes; | ||
| 323 | |||
| 324 | /* check if we need to pick another page */ | ||
| 325 | if ((working_bytes == 0 && in_page_bytes_left < LZO_LEN) | ||
| 326 | || in_page_bytes_left == 0) { | ||
| 327 | tot_in += in_page_bytes_left; | ||
| 328 | |||
| 329 | if (working_bytes == 0 && tot_in >= tot_len) | ||
| 330 | break; | ||
| 331 | |||
| 332 | kunmap(pages_in[page_in_index]); | ||
| 333 | page_in_index++; | ||
| 334 | if (page_in_index >= total_pages_in) { | ||
| 335 | ret = -1; | ||
| 336 | data_in = NULL; | ||
| 337 | goto done; | ||
| 338 | } | ||
| 339 | data_in = kmap(pages_in[page_in_index]); | ||
| 340 | |||
| 341 | in_page_bytes_left = PAGE_CACHE_SIZE; | ||
| 342 | in_offset = 0; | ||
| 343 | } | ||
| 344 | } | ||
| 345 | |||
| 346 | out_len = lzo1x_worst_compress(PAGE_CACHE_SIZE); | ||
| 347 | ret = lzo1x_decompress_safe(buf, in_len, workspace->buf, | ||
| 348 | &out_len); | ||
| 349 | if (ret != LZO_E_OK) { | ||
| 350 | printk(KERN_WARNING "btrfs decompress failed\n"); | ||
| 351 | ret = -1; | ||
| 352 | break; | ||
| 353 | } | ||
| 354 | |||
| 355 | buf_start = tot_out; | ||
| 356 | tot_out += out_len; | ||
| 357 | |||
| 358 | ret2 = btrfs_decompress_buf2page(workspace->buf, buf_start, | ||
| 359 | tot_out, disk_start, | ||
| 360 | bvec, vcnt, | ||
| 361 | &page_out_index, &pg_offset); | ||
| 362 | if (ret2 == 0) | ||
| 363 | break; | ||
| 364 | } | ||
| 365 | done: | ||
| 366 | if (data_in) | ||
| 367 | kunmap(pages_in[page_in_index]); | ||
| 368 | return ret; | ||
| 369 | } | ||
| 370 | |||
| 371 | static int lzo_decompress(struct list_head *ws, unsigned char *data_in, | ||
| 372 | struct page *dest_page, | ||
| 373 | unsigned long start_byte, | ||
| 374 | size_t srclen, size_t destlen) | ||
| 375 | { | ||
| 376 | struct workspace *workspace = list_entry(ws, struct workspace, list); | ||
| 377 | size_t in_len; | ||
| 378 | size_t out_len; | ||
| 379 | size_t tot_len; | ||
| 380 | int ret = 0; | ||
| 381 | char *kaddr; | ||
| 382 | unsigned long bytes; | ||
| 383 | |||
| 384 | BUG_ON(srclen < LZO_LEN); | ||
| 385 | |||
| 386 | tot_len = read_compress_length(data_in); | ||
| 387 | data_in += LZO_LEN; | ||
| 388 | |||
| 389 | in_len = read_compress_length(data_in); | ||
| 390 | data_in += LZO_LEN; | ||
| 391 | |||
| 392 | out_len = PAGE_CACHE_SIZE; | ||
| 393 | ret = lzo1x_decompress_safe(data_in, in_len, workspace->buf, &out_len); | ||
| 394 | if (ret != LZO_E_OK) { | ||
| 395 | printk(KERN_WARNING "btrfs decompress failed!\n"); | ||
| 396 | ret = -1; | ||
| 397 | goto out; | ||
| 398 | } | ||
| 399 | |||
| 400 | if (out_len < start_byte) { | ||
| 401 | ret = -1; | ||
| 402 | goto out; | ||
| 403 | } | ||
| 404 | |||
| 405 | bytes = min_t(unsigned long, destlen, out_len - start_byte); | ||
| 406 | |||
| 407 | kaddr = kmap_atomic(dest_page, KM_USER0); | ||
| 408 | memcpy(kaddr, workspace->buf + start_byte, bytes); | ||
| 409 | kunmap_atomic(kaddr, KM_USER0); | ||
| 410 | out: | ||
| 411 | return ret; | ||
| 412 | } | ||
| 413 | |||
| 414 | struct btrfs_compress_op btrfs_lzo_compress = { | ||
| 415 | .alloc_workspace = lzo_alloc_workspace, | ||
| 416 | .free_workspace = lzo_free_workspace, | ||
| 417 | .compress_pages = lzo_compress_pages, | ||
| 418 | .decompress_biovec = lzo_decompress_biovec, | ||
| 419 | .decompress = lzo_decompress, | ||
| 420 | }; | ||
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index ae7737e352c9..2b61e1ddcd99 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c | |||
| @@ -172,7 +172,7 @@ static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree, | |||
| 172 | */ | 172 | */ |
| 173 | static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | 173 | static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, |
| 174 | u64 start, u64 len, u64 disk_len, | 174 | u64 start, u64 len, u64 disk_len, |
| 175 | int type, int dio) | 175 | int type, int dio, int compress_type) |
| 176 | { | 176 | { |
| 177 | struct btrfs_ordered_inode_tree *tree; | 177 | struct btrfs_ordered_inode_tree *tree; |
| 178 | struct rb_node *node; | 178 | struct rb_node *node; |
| @@ -189,6 +189,7 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | |||
| 189 | entry->disk_len = disk_len; | 189 | entry->disk_len = disk_len; |
| 190 | entry->bytes_left = len; | 190 | entry->bytes_left = len; |
| 191 | entry->inode = inode; | 191 | entry->inode = inode; |
| 192 | entry->compress_type = compress_type; | ||
| 192 | if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE) | 193 | if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE) |
| 193 | set_bit(type, &entry->flags); | 194 | set_bit(type, &entry->flags); |
| 194 | 195 | ||
| @@ -220,14 +221,25 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | |||
| 220 | u64 start, u64 len, u64 disk_len, int type) | 221 | u64 start, u64 len, u64 disk_len, int type) |
| 221 | { | 222 | { |
| 222 | return __btrfs_add_ordered_extent(inode, file_offset, start, len, | 223 | return __btrfs_add_ordered_extent(inode, file_offset, start, len, |
| 223 | disk_len, type, 0); | 224 | disk_len, type, 0, |
| 225 | BTRFS_COMPRESS_NONE); | ||
| 224 | } | 226 | } |
| 225 | 227 | ||
| 226 | int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset, | 228 | int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset, |
| 227 | u64 start, u64 len, u64 disk_len, int type) | 229 | u64 start, u64 len, u64 disk_len, int type) |
| 228 | { | 230 | { |
| 229 | return __btrfs_add_ordered_extent(inode, file_offset, start, len, | 231 | return __btrfs_add_ordered_extent(inode, file_offset, start, len, |
| 230 | disk_len, type, 1); | 232 | disk_len, type, 1, |
| 233 | BTRFS_COMPRESS_NONE); | ||
| 234 | } | ||
| 235 | |||
| 236 | int btrfs_add_ordered_extent_compress(struct inode *inode, u64 file_offset, | ||
| 237 | u64 start, u64 len, u64 disk_len, | ||
| 238 | int type, int compress_type) | ||
| 239 | { | ||
| 240 | return __btrfs_add_ordered_extent(inode, file_offset, start, len, | ||
| 241 | disk_len, type, 0, | ||
| 242 | compress_type); | ||
| 231 | } | 243 | } |
| 232 | 244 | ||
| 233 | /* | 245 | /* |
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index 61dca83119dd..ff1f69aa1883 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h | |||
| @@ -68,7 +68,7 @@ struct btrfs_ordered_sum { | |||
| 68 | 68 | ||
| 69 | #define BTRFS_ORDERED_NOCOW 2 /* set when we want to write in place */ | 69 | #define BTRFS_ORDERED_NOCOW 2 /* set when we want to write in place */ |
| 70 | 70 | ||
| 71 | #define BTRFS_ORDERED_COMPRESSED 3 /* writing a compressed extent */ | 71 | #define BTRFS_ORDERED_COMPRESSED 3 /* writing a zlib compressed extent */ |
| 72 | 72 | ||
| 73 | #define BTRFS_ORDERED_PREALLOC 4 /* set when writing to prealloced extent */ | 73 | #define BTRFS_ORDERED_PREALLOC 4 /* set when writing to prealloced extent */ |
| 74 | 74 | ||
| @@ -93,6 +93,9 @@ struct btrfs_ordered_extent { | |||
| 93 | /* flags (described above) */ | 93 | /* flags (described above) */ |
| 94 | unsigned long flags; | 94 | unsigned long flags; |
| 95 | 95 | ||
| 96 | /* compression algorithm */ | ||
| 97 | int compress_type; | ||
| 98 | |||
| 96 | /* reference count */ | 99 | /* reference count */ |
| 97 | atomic_t refs; | 100 | atomic_t refs; |
| 98 | 101 | ||
| @@ -148,6 +151,9 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | |||
| 148 | u64 start, u64 len, u64 disk_len, int type); | 151 | u64 start, u64 len, u64 disk_len, int type); |
| 149 | int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset, | 152 | int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset, |
| 150 | u64 start, u64 len, u64 disk_len, int type); | 153 | u64 start, u64 len, u64 disk_len, int type); |
| 154 | int btrfs_add_ordered_extent_compress(struct inode *inode, u64 file_offset, | ||
| 155 | u64 start, u64 len, u64 disk_len, | ||
| 156 | int type, int compress_type); | ||
| 151 | int btrfs_add_ordered_sum(struct inode *inode, | 157 | int btrfs_add_ordered_sum(struct inode *inode, |
| 152 | struct btrfs_ordered_extent *entry, | 158 | struct btrfs_ordered_extent *entry, |
| 153 | struct btrfs_ordered_sum *sum); | 159 | struct btrfs_ordered_sum *sum); |
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 22acdaa78ce1..b2130c46fdb5 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c | |||
| @@ -54,6 +54,90 @@ | |||
| 54 | 54 | ||
| 55 | static const struct super_operations btrfs_super_ops; | 55 | static const struct super_operations btrfs_super_ops; |
| 56 | 56 | ||
| 57 | static const char *btrfs_decode_error(struct btrfs_fs_info *fs_info, int errno, | ||
| 58 | char nbuf[16]) | ||
| 59 | { | ||
| 60 | char *errstr = NULL; | ||
| 61 | |||
| 62 | switch (errno) { | ||
| 63 | case -EIO: | ||
| 64 | errstr = "IO failure"; | ||
| 65 | break; | ||
| 66 | case -ENOMEM: | ||
| 67 | errstr = "Out of memory"; | ||
| 68 | break; | ||
| 69 | case -EROFS: | ||
| 70 | errstr = "Readonly filesystem"; | ||
| 71 | break; | ||
| 72 | default: | ||
| 73 | if (nbuf) { | ||
| 74 | if (snprintf(nbuf, 16, "error %d", -errno) >= 0) | ||
| 75 | errstr = nbuf; | ||
| 76 | } | ||
| 77 | break; | ||
| 78 | } | ||
| 79 | |||
| 80 | return errstr; | ||
| 81 | } | ||
| 82 | |||
| 83 | static void __save_error_info(struct btrfs_fs_info *fs_info) | ||
| 84 | { | ||
| 85 | /* | ||
| 86 | * today we only save the error info into ram. Long term we'll | ||
| 87 | * also send it down to the disk | ||
| 88 | */ | ||
| 89 | fs_info->fs_state = BTRFS_SUPER_FLAG_ERROR; | ||
| 90 | } | ||
| 91 | |||
| 92 | /* NOTE: | ||
| 93 | * We move write_super stuff at umount in order to avoid deadlock | ||
| 94 | * for umount hold all lock. | ||
| 95 | */ | ||
| 96 | static void save_error_info(struct btrfs_fs_info *fs_info) | ||
| 97 | { | ||
| 98 | __save_error_info(fs_info); | ||
| 99 | } | ||
| 100 | |||
| 101 | /* btrfs handle error by forcing the filesystem readonly */ | ||
| 102 | static void btrfs_handle_error(struct btrfs_fs_info *fs_info) | ||
| 103 | { | ||
| 104 | struct super_block *sb = fs_info->sb; | ||
| 105 | |||
| 106 | if (sb->s_flags & MS_RDONLY) | ||
| 107 | return; | ||
| 108 | |||
| 109 | if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { | ||
| 110 | sb->s_flags |= MS_RDONLY; | ||
| 111 | printk(KERN_INFO "btrfs is forced readonly\n"); | ||
| 112 | } | ||
| 113 | } | ||
| 114 | |||
| 115 | /* | ||
| 116 | * __btrfs_std_error decodes expected errors from the caller and | ||
| 117 | * invokes the approciate error response. | ||
| 118 | */ | ||
| 119 | void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function, | ||
| 120 | unsigned int line, int errno) | ||
| 121 | { | ||
| 122 | struct super_block *sb = fs_info->sb; | ||
| 123 | char nbuf[16]; | ||
| 124 | const char *errstr; | ||
| 125 | |||
| 126 | /* | ||
| 127 | * Special case: if the error is EROFS, and we're already | ||
| 128 | * under MS_RDONLY, then it is safe here. | ||
| 129 | */ | ||
| 130 | if (errno == -EROFS && (sb->s_flags & MS_RDONLY)) | ||
| 131 | return; | ||
| 132 | |||
| 133 | errstr = btrfs_decode_error(fs_info, errno, nbuf); | ||
| 134 | printk(KERN_CRIT "BTRFS error (device %s) in %s:%d: %s\n", | ||
| 135 | sb->s_id, function, line, errstr); | ||
| 136 | save_error_info(fs_info); | ||
| 137 | |||
| 138 | btrfs_handle_error(fs_info); | ||
| 139 | } | ||
| 140 | |||
| 57 | static void btrfs_put_super(struct super_block *sb) | 141 | static void btrfs_put_super(struct super_block *sb) |
| 58 | { | 142 | { |
| 59 | struct btrfs_root *root = btrfs_sb(sb); | 143 | struct btrfs_root *root = btrfs_sb(sb); |
| @@ -69,9 +153,9 @@ enum { | |||
| 69 | Opt_degraded, Opt_subvol, Opt_subvolid, Opt_device, Opt_nodatasum, | 153 | Opt_degraded, Opt_subvol, Opt_subvolid, Opt_device, Opt_nodatasum, |
| 70 | Opt_nodatacow, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, Opt_ssd, | 154 | Opt_nodatacow, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, Opt_ssd, |
| 71 | Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, Opt_compress, | 155 | Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, Opt_compress, |
| 72 | Opt_compress_force, Opt_notreelog, Opt_ratio, Opt_flushoncommit, | 156 | Opt_compress_type, Opt_compress_force, Opt_compress_force_type, |
| 73 | Opt_discard, Opt_space_cache, Opt_clear_cache, Opt_err, | 157 | Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard, |
| 74 | Opt_user_subvol_rm_allowed, | 158 | Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed, Opt_err, |
| 75 | }; | 159 | }; |
| 76 | 160 | ||
| 77 | static match_table_t tokens = { | 161 | static match_table_t tokens = { |
| @@ -86,7 +170,9 @@ static match_table_t tokens = { | |||
| 86 | {Opt_alloc_start, "alloc_start=%s"}, | 170 | {Opt_alloc_start, "alloc_start=%s"}, |
| 87 | {Opt_thread_pool, "thread_pool=%d"}, | 171 | {Opt_thread_pool, "thread_pool=%d"}, |
| 88 | {Opt_compress, "compress"}, | 172 | {Opt_compress, "compress"}, |
| 173 | {Opt_compress_type, "compress=%s"}, | ||
| 89 | {Opt_compress_force, "compress-force"}, | 174 | {Opt_compress_force, "compress-force"}, |
| 175 | {Opt_compress_force_type, "compress-force=%s"}, | ||
| 90 | {Opt_ssd, "ssd"}, | 176 | {Opt_ssd, "ssd"}, |
| 91 | {Opt_ssd_spread, "ssd_spread"}, | 177 | {Opt_ssd_spread, "ssd_spread"}, |
| 92 | {Opt_nossd, "nossd"}, | 178 | {Opt_nossd, "nossd"}, |
| @@ -112,6 +198,8 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) | |||
| 112 | char *p, *num, *orig; | 198 | char *p, *num, *orig; |
| 113 | int intarg; | 199 | int intarg; |
| 114 | int ret = 0; | 200 | int ret = 0; |
| 201 | char *compress_type; | ||
| 202 | bool compress_force = false; | ||
| 115 | 203 | ||
| 116 | if (!options) | 204 | if (!options) |
| 117 | return 0; | 205 | return 0; |
| @@ -154,14 +242,32 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) | |||
| 154 | btrfs_set_opt(info->mount_opt, NODATACOW); | 242 | btrfs_set_opt(info->mount_opt, NODATACOW); |
| 155 | btrfs_set_opt(info->mount_opt, NODATASUM); | 243 | btrfs_set_opt(info->mount_opt, NODATASUM); |
| 156 | break; | 244 | break; |
| 157 | case Opt_compress: | ||
| 158 | printk(KERN_INFO "btrfs: use compression\n"); | ||
| 159 | btrfs_set_opt(info->mount_opt, COMPRESS); | ||
| 160 | break; | ||
| 161 | case Opt_compress_force: | 245 | case Opt_compress_force: |
| 162 | printk(KERN_INFO "btrfs: forcing compression\n"); | 246 | case Opt_compress_force_type: |
| 163 | btrfs_set_opt(info->mount_opt, FORCE_COMPRESS); | 247 | compress_force = true; |
| 248 | case Opt_compress: | ||
| 249 | case Opt_compress_type: | ||
| 250 | if (token == Opt_compress || | ||
| 251 | token == Opt_compress_force || | ||
| 252 | strcmp(args[0].from, "zlib") == 0) { | ||
| 253 | compress_type = "zlib"; | ||
| 254 | info->compress_type = BTRFS_COMPRESS_ZLIB; | ||
| 255 | } else if (strcmp(args[0].from, "lzo") == 0) { | ||
| 256 | compress_type = "lzo"; | ||
| 257 | info->compress_type = BTRFS_COMPRESS_LZO; | ||
| 258 | } else { | ||
| 259 | ret = -EINVAL; | ||
| 260 | goto out; | ||
| 261 | } | ||
| 262 | |||
| 164 | btrfs_set_opt(info->mount_opt, COMPRESS); | 263 | btrfs_set_opt(info->mount_opt, COMPRESS); |
| 264 | if (compress_force) { | ||
| 265 | btrfs_set_opt(info->mount_opt, FORCE_COMPRESS); | ||
| 266 | pr_info("btrfs: force %s compression\n", | ||
| 267 | compress_type); | ||
| 268 | } else | ||
| 269 | pr_info("btrfs: use %s compression\n", | ||
| 270 | compress_type); | ||
| 165 | break; | 271 | break; |
| 166 | case Opt_ssd: | 272 | case Opt_ssd: |
| 167 | printk(KERN_INFO "btrfs: use ssd allocation scheme\n"); | 273 | printk(KERN_INFO "btrfs: use ssd allocation scheme\n"); |
| @@ -753,6 +859,127 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) | |||
| 753 | return 0; | 859 | return 0; |
| 754 | } | 860 | } |
| 755 | 861 | ||
| 862 | /* | ||
| 863 | * The helper to calc the free space on the devices that can be used to store | ||
| 864 | * file data. | ||
| 865 | */ | ||
| 866 | static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes) | ||
| 867 | { | ||
| 868 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
| 869 | struct btrfs_device_info *devices_info; | ||
| 870 | struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; | ||
| 871 | struct btrfs_device *device; | ||
| 872 | u64 skip_space; | ||
| 873 | u64 type; | ||
| 874 | u64 avail_space; | ||
| 875 | u64 used_space; | ||
| 876 | u64 min_stripe_size; | ||
| 877 | int min_stripes = 1; | ||
| 878 | int i = 0, nr_devices; | ||
| 879 | int ret; | ||
| 880 | |||
| 881 | nr_devices = fs_info->fs_devices->rw_devices; | ||
| 882 | BUG_ON(!nr_devices); | ||
| 883 | |||
| 884 | devices_info = kmalloc(sizeof(*devices_info) * nr_devices, | ||
| 885 | GFP_NOFS); | ||
| 886 | if (!devices_info) | ||
| 887 | return -ENOMEM; | ||
| 888 | |||
| 889 | /* calc min stripe number for data space alloction */ | ||
| 890 | type = btrfs_get_alloc_profile(root, 1); | ||
| 891 | if (type & BTRFS_BLOCK_GROUP_RAID0) | ||
| 892 | min_stripes = 2; | ||
| 893 | else if (type & BTRFS_BLOCK_GROUP_RAID1) | ||
| 894 | min_stripes = 2; | ||
| 895 | else if (type & BTRFS_BLOCK_GROUP_RAID10) | ||
| 896 | min_stripes = 4; | ||
| 897 | |||
| 898 | if (type & BTRFS_BLOCK_GROUP_DUP) | ||
| 899 | min_stripe_size = 2 * BTRFS_STRIPE_LEN; | ||
| 900 | else | ||
| 901 | min_stripe_size = BTRFS_STRIPE_LEN; | ||
| 902 | |||
| 903 | list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) { | ||
| 904 | if (!device->in_fs_metadata) | ||
| 905 | continue; | ||
| 906 | |||
| 907 | avail_space = device->total_bytes - device->bytes_used; | ||
| 908 | |||
| 909 | /* align with stripe_len */ | ||
| 910 | do_div(avail_space, BTRFS_STRIPE_LEN); | ||
| 911 | avail_space *= BTRFS_STRIPE_LEN; | ||
| 912 | |||
| 913 | /* | ||
| 914 | * In order to avoid overwritting the superblock on the drive, | ||
| 915 | * btrfs starts at an offset of at least 1MB when doing chunk | ||
| 916 | * allocation. | ||
| 917 | */ | ||
| 918 | skip_space = 1024 * 1024; | ||
| 919 | |||
| 920 | /* user can set the offset in fs_info->alloc_start. */ | ||
| 921 | if (fs_info->alloc_start + BTRFS_STRIPE_LEN <= | ||
| 922 | device->total_bytes) | ||
| 923 | skip_space = max(fs_info->alloc_start, skip_space); | ||
| 924 | |||
| 925 | /* | ||
| 926 | * btrfs can not use the free space in [0, skip_space - 1], | ||
| 927 | * we must subtract it from the total. In order to implement | ||
| 928 | * it, we account the used space in this range first. | ||
| 929 | */ | ||
| 930 | ret = btrfs_account_dev_extents_size(device, 0, skip_space - 1, | ||
| 931 | &used_space); | ||
| 932 | if (ret) { | ||
| 933 | kfree(devices_info); | ||
| 934 | return ret; | ||
| 935 | } | ||
| 936 | |||
| 937 | /* calc the free space in [0, skip_space - 1] */ | ||
| 938 | skip_space -= used_space; | ||
| 939 | |||
| 940 | /* | ||
| 941 | * we can use the free space in [0, skip_space - 1], subtract | ||
| 942 | * it from the total. | ||
| 943 | */ | ||
| 944 | if (avail_space && avail_space >= skip_space) | ||
| 945 | avail_space -= skip_space; | ||
| 946 | else | ||
| 947 | avail_space = 0; | ||
| 948 | |||
| 949 | if (avail_space < min_stripe_size) | ||
| 950 | continue; | ||
| 951 | |||
| 952 | devices_info[i].dev = device; | ||
| 953 | devices_info[i].max_avail = avail_space; | ||
| 954 | |||
| 955 | i++; | ||
| 956 | } | ||
| 957 | |||
| 958 | nr_devices = i; | ||
| 959 | |||
| 960 | btrfs_descending_sort_devices(devices_info, nr_devices); | ||
| 961 | |||
| 962 | i = nr_devices - 1; | ||
| 963 | avail_space = 0; | ||
| 964 | while (nr_devices >= min_stripes) { | ||
| 965 | if (devices_info[i].max_avail >= min_stripe_size) { | ||
| 966 | int j; | ||
| 967 | u64 alloc_size; | ||
| 968 | |||
| 969 | avail_space += devices_info[i].max_avail * min_stripes; | ||
| 970 | alloc_size = devices_info[i].max_avail; | ||
| 971 | for (j = i + 1 - min_stripes; j <= i; j++) | ||
| 972 | devices_info[j].max_avail -= alloc_size; | ||
| 973 | } | ||
| 974 | i--; | ||
| 975 | nr_devices--; | ||
| 976 | } | ||
| 977 | |||
| 978 | kfree(devices_info); | ||
| 979 | *free_bytes = avail_space; | ||
| 980 | return 0; | ||
| 981 | } | ||
| 982 | |||
| 756 | static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) | 983 | static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) |
| 757 | { | 984 | { |
| 758 | struct btrfs_root *root = btrfs_sb(dentry->d_sb); | 985 | struct btrfs_root *root = btrfs_sb(dentry->d_sb); |
| @@ -760,17 +987,21 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
| 760 | struct list_head *head = &root->fs_info->space_info; | 987 | struct list_head *head = &root->fs_info->space_info; |
| 761 | struct btrfs_space_info *found; | 988 | struct btrfs_space_info *found; |
| 762 | u64 total_used = 0; | 989 | u64 total_used = 0; |
| 763 | u64 total_used_data = 0; | 990 | u64 total_free_data = 0; |
| 764 | int bits = dentry->d_sb->s_blocksize_bits; | 991 | int bits = dentry->d_sb->s_blocksize_bits; |
| 765 | __be32 *fsid = (__be32 *)root->fs_info->fsid; | 992 | __be32 *fsid = (__be32 *)root->fs_info->fsid; |
| 993 | int ret; | ||
| 766 | 994 | ||
| 995 | /* holding chunk_muext to avoid allocating new chunks */ | ||
| 996 | mutex_lock(&root->fs_info->chunk_mutex); | ||
| 767 | rcu_read_lock(); | 997 | rcu_read_lock(); |
| 768 | list_for_each_entry_rcu(found, head, list) { | 998 | list_for_each_entry_rcu(found, head, list) { |
| 769 | if (found->flags & (BTRFS_BLOCK_GROUP_METADATA | | 999 | if (found->flags & BTRFS_BLOCK_GROUP_DATA) { |
| 770 | BTRFS_BLOCK_GROUP_SYSTEM)) | 1000 | total_free_data += found->disk_total - found->disk_used; |
| 771 | total_used_data += found->disk_total; | 1001 | total_free_data -= |
| 772 | else | 1002 | btrfs_account_ro_block_groups_free_space(found); |
| 773 | total_used_data += found->disk_used; | 1003 | } |
| 1004 | |||
| 774 | total_used += found->disk_used; | 1005 | total_used += found->disk_used; |
| 775 | } | 1006 | } |
| 776 | rcu_read_unlock(); | 1007 | rcu_read_unlock(); |
| @@ -778,9 +1009,17 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
| 778 | buf->f_namelen = BTRFS_NAME_LEN; | 1009 | buf->f_namelen = BTRFS_NAME_LEN; |
| 779 | buf->f_blocks = btrfs_super_total_bytes(disk_super) >> bits; | 1010 | buf->f_blocks = btrfs_super_total_bytes(disk_super) >> bits; |
| 780 | buf->f_bfree = buf->f_blocks - (total_used >> bits); | 1011 | buf->f_bfree = buf->f_blocks - (total_used >> bits); |
| 781 | buf->f_bavail = buf->f_blocks - (total_used_data >> bits); | ||
| 782 | buf->f_bsize = dentry->d_sb->s_blocksize; | 1012 | buf->f_bsize = dentry->d_sb->s_blocksize; |
| 783 | buf->f_type = BTRFS_SUPER_MAGIC; | 1013 | buf->f_type = BTRFS_SUPER_MAGIC; |
| 1014 | buf->f_bavail = total_free_data; | ||
| 1015 | ret = btrfs_calc_avail_data_space(root, &total_free_data); | ||
| 1016 | if (ret) { | ||
| 1017 | mutex_unlock(&root->fs_info->chunk_mutex); | ||
| 1018 | return ret; | ||
| 1019 | } | ||
| 1020 | buf->f_bavail += total_free_data; | ||
| 1021 | buf->f_bavail = buf->f_bavail >> bits; | ||
| 1022 | mutex_unlock(&root->fs_info->chunk_mutex); | ||
| 784 | 1023 | ||
| 785 | /* We treat it as constant endianness (it doesn't matter _which_) | 1024 | /* We treat it as constant endianness (it doesn't matter _which_) |
| 786 | because we want the fsid to come out the same whether mounted | 1025 | because we want the fsid to come out the same whether mounted |
| @@ -897,10 +1136,14 @@ static int __init init_btrfs_fs(void) | |||
| 897 | if (err) | 1136 | if (err) |
| 898 | return err; | 1137 | return err; |
| 899 | 1138 | ||
| 900 | err = btrfs_init_cachep(); | 1139 | err = btrfs_init_compress(); |
| 901 | if (err) | 1140 | if (err) |
| 902 | goto free_sysfs; | 1141 | goto free_sysfs; |
| 903 | 1142 | ||
| 1143 | err = btrfs_init_cachep(); | ||
| 1144 | if (err) | ||
| 1145 | goto free_compress; | ||
| 1146 | |||
| 904 | err = extent_io_init(); | 1147 | err = extent_io_init(); |
| 905 | if (err) | 1148 | if (err) |
| 906 | goto free_cachep; | 1149 | goto free_cachep; |
| @@ -928,6 +1171,8 @@ free_extent_io: | |||
| 928 | extent_io_exit(); | 1171 | extent_io_exit(); |
| 929 | free_cachep: | 1172 | free_cachep: |
| 930 | btrfs_destroy_cachep(); | 1173 | btrfs_destroy_cachep(); |
| 1174 | free_compress: | ||
| 1175 | btrfs_exit_compress(); | ||
| 931 | free_sysfs: | 1176 | free_sysfs: |
| 932 | btrfs_exit_sysfs(); | 1177 | btrfs_exit_sysfs(); |
| 933 | return err; | 1178 | return err; |
| @@ -942,7 +1187,7 @@ static void __exit exit_btrfs_fs(void) | |||
| 942 | unregister_filesystem(&btrfs_fs_type); | 1187 | unregister_filesystem(&btrfs_fs_type); |
| 943 | btrfs_exit_sysfs(); | 1188 | btrfs_exit_sysfs(); |
| 944 | btrfs_cleanup_fs_uuids(); | 1189 | btrfs_cleanup_fs_uuids(); |
| 945 | btrfs_zlib_exit(); | 1190 | btrfs_exit_compress(); |
| 946 | } | 1191 | } |
| 947 | 1192 | ||
| 948 | module_init(init_btrfs_fs) | 1193 | module_init(init_btrfs_fs) |
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index f50e931fc217..bae5c7b8bbe2 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c | |||
| @@ -181,6 +181,9 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, | |||
| 181 | struct btrfs_trans_handle *h; | 181 | struct btrfs_trans_handle *h; |
| 182 | struct btrfs_transaction *cur_trans; | 182 | struct btrfs_transaction *cur_trans; |
| 183 | int ret; | 183 | int ret; |
| 184 | |||
| 185 | if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) | ||
| 186 | return ERR_PTR(-EROFS); | ||
| 184 | again: | 187 | again: |
| 185 | h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS); | 188 | h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS); |
| 186 | if (!h) | 189 | if (!h) |
| @@ -910,6 +913,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
| 910 | u64 to_reserve = 0; | 913 | u64 to_reserve = 0; |
| 911 | u64 index = 0; | 914 | u64 index = 0; |
| 912 | u64 objectid; | 915 | u64 objectid; |
| 916 | u64 root_flags; | ||
| 913 | 917 | ||
| 914 | new_root_item = kmalloc(sizeof(*new_root_item), GFP_NOFS); | 918 | new_root_item = kmalloc(sizeof(*new_root_item), GFP_NOFS); |
| 915 | if (!new_root_item) { | 919 | if (!new_root_item) { |
| @@ -967,6 +971,13 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
| 967 | btrfs_set_root_last_snapshot(&root->root_item, trans->transid); | 971 | btrfs_set_root_last_snapshot(&root->root_item, trans->transid); |
| 968 | memcpy(new_root_item, &root->root_item, sizeof(*new_root_item)); | 972 | memcpy(new_root_item, &root->root_item, sizeof(*new_root_item)); |
| 969 | 973 | ||
| 974 | root_flags = btrfs_root_flags(new_root_item); | ||
| 975 | if (pending->readonly) | ||
| 976 | root_flags |= BTRFS_ROOT_SUBVOL_RDONLY; | ||
| 977 | else | ||
| 978 | root_flags &= ~BTRFS_ROOT_SUBVOL_RDONLY; | ||
| 979 | btrfs_set_root_flags(new_root_item, root_flags); | ||
| 980 | |||
| 970 | old = btrfs_lock_root_node(root); | 981 | old = btrfs_lock_root_node(root); |
| 971 | btrfs_cow_block(trans, root, old, NULL, 0, &old); | 982 | btrfs_cow_block(trans, root, old, NULL, 0, &old); |
| 972 | btrfs_set_lock_blocking(old); | 983 | btrfs_set_lock_blocking(old); |
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index f104b57ad4ef..229a594cacd5 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h | |||
| @@ -62,6 +62,7 @@ struct btrfs_pending_snapshot { | |||
| 62 | struct btrfs_block_rsv block_rsv; | 62 | struct btrfs_block_rsv block_rsv; |
| 63 | /* extra metadata reseration for relocation */ | 63 | /* extra metadata reseration for relocation */ |
| 64 | int error; | 64 | int error; |
| 65 | bool readonly; | ||
| 65 | struct list_head list; | 66 | struct list_head list; |
| 66 | }; | 67 | }; |
| 67 | 68 | ||
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 1718e1a5c320..d158530233b7 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c | |||
| @@ -22,6 +22,7 @@ | |||
| 22 | #include <linux/blkdev.h> | 22 | #include <linux/blkdev.h> |
| 23 | #include <linux/random.h> | 23 | #include <linux/random.h> |
| 24 | #include <linux/iocontext.h> | 24 | #include <linux/iocontext.h> |
| 25 | #include <linux/capability.h> | ||
| 25 | #include <asm/div64.h> | 26 | #include <asm/div64.h> |
| 26 | #include "compat.h" | 27 | #include "compat.h" |
| 27 | #include "ctree.h" | 28 | #include "ctree.h" |
| @@ -600,8 +601,10 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices, | |||
| 600 | set_blocksize(bdev, 4096); | 601 | set_blocksize(bdev, 4096); |
| 601 | 602 | ||
| 602 | bh = btrfs_read_dev_super(bdev); | 603 | bh = btrfs_read_dev_super(bdev); |
| 603 | if (!bh) | 604 | if (!bh) { |
| 605 | ret = -EINVAL; | ||
| 604 | goto error_close; | 606 | goto error_close; |
| 607 | } | ||
| 605 | 608 | ||
| 606 | disk_super = (struct btrfs_super_block *)bh->b_data; | 609 | disk_super = (struct btrfs_super_block *)bh->b_data; |
| 607 | devid = btrfs_stack_device_id(&disk_super->dev_item); | 610 | devid = btrfs_stack_device_id(&disk_super->dev_item); |
| @@ -703,7 +706,7 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, | |||
| 703 | goto error_close; | 706 | goto error_close; |
| 704 | bh = btrfs_read_dev_super(bdev); | 707 | bh = btrfs_read_dev_super(bdev); |
| 705 | if (!bh) { | 708 | if (!bh) { |
| 706 | ret = -EIO; | 709 | ret = -EINVAL; |
| 707 | goto error_close; | 710 | goto error_close; |
| 708 | } | 711 | } |
| 709 | disk_super = (struct btrfs_super_block *)bh->b_data; | 712 | disk_super = (struct btrfs_super_block *)bh->b_data; |
| @@ -729,59 +732,167 @@ error: | |||
| 729 | return ret; | 732 | return ret; |
| 730 | } | 733 | } |
| 731 | 734 | ||
| 735 | /* helper to account the used device space in the range */ | ||
| 736 | int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start, | ||
| 737 | u64 end, u64 *length) | ||
| 738 | { | ||
| 739 | struct btrfs_key key; | ||
| 740 | struct btrfs_root *root = device->dev_root; | ||
| 741 | struct btrfs_dev_extent *dev_extent; | ||
| 742 | struct btrfs_path *path; | ||
| 743 | u64 extent_end; | ||
| 744 | int ret; | ||
| 745 | int slot; | ||
| 746 | struct extent_buffer *l; | ||
| 747 | |||
| 748 | *length = 0; | ||
| 749 | |||
| 750 | if (start >= device->total_bytes) | ||
| 751 | return 0; | ||
| 752 | |||
| 753 | path = btrfs_alloc_path(); | ||
| 754 | if (!path) | ||
| 755 | return -ENOMEM; | ||
| 756 | path->reada = 2; | ||
| 757 | |||
| 758 | key.objectid = device->devid; | ||
| 759 | key.offset = start; | ||
| 760 | key.type = BTRFS_DEV_EXTENT_KEY; | ||
| 761 | |||
| 762 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | ||
| 763 | if (ret < 0) | ||
| 764 | goto out; | ||
| 765 | if (ret > 0) { | ||
| 766 | ret = btrfs_previous_item(root, path, key.objectid, key.type); | ||
| 767 | if (ret < 0) | ||
| 768 | goto out; | ||
| 769 | } | ||
| 770 | |||
| 771 | while (1) { | ||
| 772 | l = path->nodes[0]; | ||
| 773 | slot = path->slots[0]; | ||
| 774 | if (slot >= btrfs_header_nritems(l)) { | ||
| 775 | ret = btrfs_next_leaf(root, path); | ||
| 776 | if (ret == 0) | ||
| 777 | continue; | ||
| 778 | if (ret < 0) | ||
| 779 | goto out; | ||
| 780 | |||
| 781 | break; | ||
| 782 | } | ||
| 783 | btrfs_item_key_to_cpu(l, &key, slot); | ||
| 784 | |||
| 785 | if (key.objectid < device->devid) | ||
| 786 | goto next; | ||
| 787 | |||
| 788 | if (key.objectid > device->devid) | ||
| 789 | break; | ||
| 790 | |||
| 791 | if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY) | ||
| 792 | goto next; | ||
| 793 | |||
| 794 | dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent); | ||
| 795 | extent_end = key.offset + btrfs_dev_extent_length(l, | ||
| 796 | dev_extent); | ||
| 797 | if (key.offset <= start && extent_end > end) { | ||
| 798 | *length = end - start + 1; | ||
| 799 | break; | ||
| 800 | } else if (key.offset <= start && extent_end > start) | ||
| 801 | *length += extent_end - start; | ||
| 802 | else if (key.offset > start && extent_end <= end) | ||
| 803 | *length += extent_end - key.offset; | ||
| 804 | else if (key.offset > start && key.offset <= end) { | ||
| 805 | *length += end - key.offset + 1; | ||
| 806 | break; | ||
| 807 | } else if (key.offset > end) | ||
| 808 | break; | ||
| 809 | |||
| 810 | next: | ||
| 811 | path->slots[0]++; | ||
| 812 | } | ||
| 813 | ret = 0; | ||
| 814 | out: | ||
| 815 | btrfs_free_path(path); | ||
| 816 | return ret; | ||
| 817 | } | ||
| 818 | |||
| 732 | /* | 819 | /* |
| 820 | * find_free_dev_extent - find free space in the specified device | ||
| 821 | * @trans: transaction handler | ||
| 822 | * @device: the device which we search the free space in | ||
| 823 | * @num_bytes: the size of the free space that we need | ||
| 824 | * @start: store the start of the free space. | ||
| 825 | * @len: the size of the free space. that we find, or the size of the max | ||
| 826 | * free space if we don't find suitable free space | ||
| 827 | * | ||
| 733 | * this uses a pretty simple search, the expectation is that it is | 828 | * this uses a pretty simple search, the expectation is that it is |
| 734 | * called very infrequently and that a given device has a small number | 829 | * called very infrequently and that a given device has a small number |
| 735 | * of extents | 830 | * of extents |
| 831 | * | ||
| 832 | * @start is used to store the start of the free space if we find. But if we | ||
| 833 | * don't find suitable free space, it will be used to store the start position | ||
| 834 | * of the max free space. | ||
| 835 | * | ||
| 836 | * @len is used to store the size of the free space that we find. | ||
| 837 | * But if we don't find suitable free space, it is used to store the size of | ||
| 838 | * the max free space. | ||
| 736 | */ | 839 | */ |
| 737 | int find_free_dev_extent(struct btrfs_trans_handle *trans, | 840 | int find_free_dev_extent(struct btrfs_trans_handle *trans, |
| 738 | struct btrfs_device *device, u64 num_bytes, | 841 | struct btrfs_device *device, u64 num_bytes, |
| 739 | u64 *start, u64 *max_avail) | 842 | u64 *start, u64 *len) |
| 740 | { | 843 | { |
| 741 | struct btrfs_key key; | 844 | struct btrfs_key key; |
| 742 | struct btrfs_root *root = device->dev_root; | 845 | struct btrfs_root *root = device->dev_root; |
| 743 | struct btrfs_dev_extent *dev_extent = NULL; | 846 | struct btrfs_dev_extent *dev_extent; |
| 744 | struct btrfs_path *path; | 847 | struct btrfs_path *path; |
| 745 | u64 hole_size = 0; | 848 | u64 hole_size; |
| 746 | u64 last_byte = 0; | 849 | u64 max_hole_start; |
| 747 | u64 search_start = 0; | 850 | u64 max_hole_size; |
| 851 | u64 extent_end; | ||
| 852 | u64 search_start; | ||
| 748 | u64 search_end = device->total_bytes; | 853 | u64 search_end = device->total_bytes; |
| 749 | int ret; | 854 | int ret; |
| 750 | int slot = 0; | 855 | int slot; |
| 751 | int start_found; | ||
| 752 | struct extent_buffer *l; | 856 | struct extent_buffer *l; |
| 753 | 857 | ||
| 754 | path = btrfs_alloc_path(); | ||
| 755 | if (!path) | ||
| 756 | return -ENOMEM; | ||
| 757 | path->reada = 2; | ||
| 758 | start_found = 0; | ||
| 759 | |||
| 760 | /* FIXME use last free of some kind */ | 858 | /* FIXME use last free of some kind */ |
| 761 | 859 | ||
| 762 | /* we don't want to overwrite the superblock on the drive, | 860 | /* we don't want to overwrite the superblock on the drive, |
| 763 | * so we make sure to start at an offset of at least 1MB | 861 | * so we make sure to start at an offset of at least 1MB |
| 764 | */ | 862 | */ |
| 765 | search_start = max((u64)1024 * 1024, search_start); | 863 | search_start = 1024 * 1024; |
| 766 | 864 | ||
| 767 | if (root->fs_info->alloc_start + num_bytes <= device->total_bytes) | 865 | if (root->fs_info->alloc_start + num_bytes <= search_end) |
| 768 | search_start = max(root->fs_info->alloc_start, search_start); | 866 | search_start = max(root->fs_info->alloc_start, search_start); |
| 769 | 867 | ||
| 868 | max_hole_start = search_start; | ||
| 869 | max_hole_size = 0; | ||
| 870 | |||
| 871 | if (search_start >= search_end) { | ||
| 872 | ret = -ENOSPC; | ||
| 873 | goto error; | ||
| 874 | } | ||
| 875 | |||
| 876 | path = btrfs_alloc_path(); | ||
| 877 | if (!path) { | ||
| 878 | ret = -ENOMEM; | ||
| 879 | goto error; | ||
| 880 | } | ||
| 881 | path->reada = 2; | ||
| 882 | |||
| 770 | key.objectid = device->devid; | 883 | key.objectid = device->devid; |
| 771 | key.offset = search_start; | 884 | key.offset = search_start; |
| 772 | key.type = BTRFS_DEV_EXTENT_KEY; | 885 | key.type = BTRFS_DEV_EXTENT_KEY; |
| 886 | |||
| 773 | ret = btrfs_search_slot(trans, root, &key, path, 0, 0); | 887 | ret = btrfs_search_slot(trans, root, &key, path, 0, 0); |
| 774 | if (ret < 0) | 888 | if (ret < 0) |
| 775 | goto error; | 889 | goto out; |
| 776 | if (ret > 0) { | 890 | if (ret > 0) { |
| 777 | ret = btrfs_previous_item(root, path, key.objectid, key.type); | 891 | ret = btrfs_previous_item(root, path, key.objectid, key.type); |
| 778 | if (ret < 0) | 892 | if (ret < 0) |
| 779 | goto error; | 893 | goto out; |
| 780 | if (ret > 0) | ||
| 781 | start_found = 1; | ||
| 782 | } | 894 | } |
| 783 | l = path->nodes[0]; | 895 | |
| 784 | btrfs_item_key_to_cpu(l, &key, path->slots[0]); | ||
| 785 | while (1) { | 896 | while (1) { |
| 786 | l = path->nodes[0]; | 897 | l = path->nodes[0]; |
| 787 | slot = path->slots[0]; | 898 | slot = path->slots[0]; |
| @@ -790,24 +901,9 @@ int find_free_dev_extent(struct btrfs_trans_handle *trans, | |||
| 790 | if (ret == 0) | 901 | if (ret == 0) |
| 791 | continue; | 902 | continue; |
| 792 | if (ret < 0) | 903 | if (ret < 0) |
| 793 | goto error; | 904 | goto out; |
| 794 | no_more_items: | 905 | |
| 795 | if (!start_found) { | 906 | break; |
| 796 | if (search_start >= search_end) { | ||
| 797 | ret = -ENOSPC; | ||
| 798 | goto error; | ||
| 799 | } | ||
| 800 | *start = search_start; | ||
| 801 | start_found = 1; | ||
| 802 | goto check_pending; | ||
| 803 | } | ||
| 804 | *start = last_byte > search_start ? | ||
| 805 | last_byte : search_start; | ||
| 806 | if (search_end <= *start) { | ||
| 807 | ret = -ENOSPC; | ||
| 808 | goto error; | ||
| 809 | } | ||
| 810 | goto check_pending; | ||
| 811 | } | 907 | } |
| 812 | btrfs_item_key_to_cpu(l, &key, slot); | 908 | btrfs_item_key_to_cpu(l, &key, slot); |
| 813 | 909 | ||
| @@ -815,48 +911,62 @@ no_more_items: | |||
| 815 | goto next; | 911 | goto next; |
| 816 | 912 | ||
| 817 | if (key.objectid > device->devid) | 913 | if (key.objectid > device->devid) |
| 818 | goto no_more_items; | 914 | break; |
| 819 | 915 | ||
| 820 | if (key.offset >= search_start && key.offset > last_byte && | 916 | if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY) |
| 821 | start_found) { | 917 | goto next; |
| 822 | if (last_byte < search_start) | ||
| 823 | last_byte = search_start; | ||
| 824 | hole_size = key.offset - last_byte; | ||
| 825 | 918 | ||
| 826 | if (hole_size > *max_avail) | 919 | if (key.offset > search_start) { |
| 827 | *max_avail = hole_size; | 920 | hole_size = key.offset - search_start; |
| 828 | 921 | ||
| 829 | if (key.offset > last_byte && | 922 | if (hole_size > max_hole_size) { |
| 830 | hole_size >= num_bytes) { | 923 | max_hole_start = search_start; |
| 831 | *start = last_byte; | 924 | max_hole_size = hole_size; |
| 832 | goto check_pending; | 925 | } |
| 926 | |||
| 927 | /* | ||
| 928 | * If this free space is greater than which we need, | ||
| 929 | * it must be the max free space that we have found | ||
| 930 | * until now, so max_hole_start must point to the start | ||
| 931 | * of this free space and the length of this free space | ||
| 932 | * is stored in max_hole_size. Thus, we return | ||
| 933 | * max_hole_start and max_hole_size and go back to the | ||
| 934 | * caller. | ||
| 935 | */ | ||
| 936 | if (hole_size >= num_bytes) { | ||
| 937 | ret = 0; | ||
| 938 | goto out; | ||
| 833 | } | 939 | } |
| 834 | } | 940 | } |
| 835 | if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY) | ||
| 836 | goto next; | ||
| 837 | 941 | ||
| 838 | start_found = 1; | ||
| 839 | dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent); | 942 | dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent); |
| 840 | last_byte = key.offset + btrfs_dev_extent_length(l, dev_extent); | 943 | extent_end = key.offset + btrfs_dev_extent_length(l, |
| 944 | dev_extent); | ||
| 945 | if (extent_end > search_start) | ||
| 946 | search_start = extent_end; | ||
| 841 | next: | 947 | next: |
| 842 | path->slots[0]++; | 948 | path->slots[0]++; |
| 843 | cond_resched(); | 949 | cond_resched(); |
| 844 | } | 950 | } |
| 845 | check_pending: | ||
| 846 | /* we have to make sure we didn't find an extent that has already | ||
| 847 | * been allocated by the map tree or the original allocation | ||
| 848 | */ | ||
| 849 | BUG_ON(*start < search_start); | ||
| 850 | 951 | ||
| 851 | if (*start + num_bytes > search_end) { | 952 | hole_size = search_end- search_start; |
| 852 | ret = -ENOSPC; | 953 | if (hole_size > max_hole_size) { |
| 853 | goto error; | 954 | max_hole_start = search_start; |
| 955 | max_hole_size = hole_size; | ||
| 854 | } | 956 | } |
| 855 | /* check for pending inserts here */ | ||
| 856 | ret = 0; | ||
| 857 | 957 | ||
| 858 | error: | 958 | /* See above. */ |
| 959 | if (hole_size < num_bytes) | ||
| 960 | ret = -ENOSPC; | ||
| 961 | else | ||
| 962 | ret = 0; | ||
| 963 | |||
| 964 | out: | ||
| 859 | btrfs_free_path(path); | 965 | btrfs_free_path(path); |
| 966 | error: | ||
| 967 | *start = max_hole_start; | ||
| 968 | if (len) | ||
| 969 | *len = max_hole_size; | ||
| 860 | return ret; | 970 | return ret; |
| 861 | } | 971 | } |
| 862 | 972 | ||
| @@ -1196,7 +1306,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) | |||
| 1196 | set_blocksize(bdev, 4096); | 1306 | set_blocksize(bdev, 4096); |
| 1197 | bh = btrfs_read_dev_super(bdev); | 1307 | bh = btrfs_read_dev_super(bdev); |
| 1198 | if (!bh) { | 1308 | if (!bh) { |
| 1199 | ret = -EIO; | 1309 | ret = -EINVAL; |
| 1200 | goto error_close; | 1310 | goto error_close; |
| 1201 | } | 1311 | } |
| 1202 | disk_super = (struct btrfs_super_block *)bh->b_data; | 1312 | disk_super = (struct btrfs_super_block *)bh->b_data; |
| @@ -1916,6 +2026,9 @@ int btrfs_balance(struct btrfs_root *dev_root) | |||
| 1916 | if (dev_root->fs_info->sb->s_flags & MS_RDONLY) | 2026 | if (dev_root->fs_info->sb->s_flags & MS_RDONLY) |
| 1917 | return -EROFS; | 2027 | return -EROFS; |
| 1918 | 2028 | ||
| 2029 | if (!capable(CAP_SYS_ADMIN)) | ||
| 2030 | return -EPERM; | ||
| 2031 | |||
| 1919 | mutex_lock(&dev_root->fs_info->volume_mutex); | 2032 | mutex_lock(&dev_root->fs_info->volume_mutex); |
| 1920 | dev_root = dev_root->fs_info->dev_root; | 2033 | dev_root = dev_root->fs_info->dev_root; |
| 1921 | 2034 | ||
| @@ -2154,66 +2267,67 @@ static noinline u64 chunk_bytes_by_type(u64 type, u64 calc_size, | |||
| 2154 | return calc_size * num_stripes; | 2267 | return calc_size * num_stripes; |
| 2155 | } | 2268 | } |
| 2156 | 2269 | ||
| 2157 | static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, | 2270 | /* Used to sort the devices by max_avail(descending sort) */ |
| 2158 | struct btrfs_root *extent_root, | 2271 | int btrfs_cmp_device_free_bytes(const void *dev_info1, const void *dev_info2) |
| 2159 | struct map_lookup **map_ret, | ||
| 2160 | u64 *num_bytes, u64 *stripe_size, | ||
| 2161 | u64 start, u64 type) | ||
| 2162 | { | 2272 | { |
| 2163 | struct btrfs_fs_info *info = extent_root->fs_info; | 2273 | if (((struct btrfs_device_info *)dev_info1)->max_avail > |
| 2164 | struct btrfs_device *device = NULL; | 2274 | ((struct btrfs_device_info *)dev_info2)->max_avail) |
| 2165 | struct btrfs_fs_devices *fs_devices = info->fs_devices; | 2275 | return -1; |
| 2166 | struct list_head *cur; | 2276 | else if (((struct btrfs_device_info *)dev_info1)->max_avail < |
| 2167 | struct map_lookup *map = NULL; | 2277 | ((struct btrfs_device_info *)dev_info2)->max_avail) |
| 2168 | struct extent_map_tree *em_tree; | 2278 | return 1; |
| 2169 | struct extent_map *em; | 2279 | else |
| 2170 | struct list_head private_devs; | 2280 | return 0; |
| 2171 | int min_stripe_size = 1 * 1024 * 1024; | 2281 | } |
| 2172 | u64 calc_size = 1024 * 1024 * 1024; | ||
| 2173 | u64 max_chunk_size = calc_size; | ||
| 2174 | u64 min_free; | ||
| 2175 | u64 avail; | ||
| 2176 | u64 max_avail = 0; | ||
| 2177 | u64 dev_offset; | ||
| 2178 | int num_stripes = 1; | ||
| 2179 | int min_stripes = 1; | ||
| 2180 | int sub_stripes = 0; | ||
| 2181 | int looped = 0; | ||
| 2182 | int ret; | ||
| 2183 | int index; | ||
| 2184 | int stripe_len = 64 * 1024; | ||
| 2185 | 2282 | ||
| 2186 | if ((type & BTRFS_BLOCK_GROUP_RAID1) && | 2283 | static int __btrfs_calc_nstripes(struct btrfs_fs_devices *fs_devices, u64 type, |
| 2187 | (type & BTRFS_BLOCK_GROUP_DUP)) { | 2284 | int *num_stripes, int *min_stripes, |
| 2188 | WARN_ON(1); | 2285 | int *sub_stripes) |
| 2189 | type &= ~BTRFS_BLOCK_GROUP_DUP; | 2286 | { |
| 2190 | } | 2287 | *num_stripes = 1; |
| 2191 | if (list_empty(&fs_devices->alloc_list)) | 2288 | *min_stripes = 1; |
| 2192 | return -ENOSPC; | 2289 | *sub_stripes = 0; |
| 2193 | 2290 | ||
| 2194 | if (type & (BTRFS_BLOCK_GROUP_RAID0)) { | 2291 | if (type & (BTRFS_BLOCK_GROUP_RAID0)) { |
| 2195 | num_stripes = fs_devices->rw_devices; | 2292 | *num_stripes = fs_devices->rw_devices; |
| 2196 | min_stripes = 2; | 2293 | *min_stripes = 2; |
| 2197 | } | 2294 | } |
| 2198 | if (type & (BTRFS_BLOCK_GROUP_DUP)) { | 2295 | if (type & (BTRFS_BLOCK_GROUP_DUP)) { |
| 2199 | num_stripes = 2; | 2296 | *num_stripes = 2; |
| 2200 | min_stripes = 2; | 2297 | *min_stripes = 2; |
| 2201 | } | 2298 | } |
| 2202 | if (type & (BTRFS_BLOCK_GROUP_RAID1)) { | 2299 | if (type & (BTRFS_BLOCK_GROUP_RAID1)) { |
| 2203 | if (fs_devices->rw_devices < 2) | 2300 | if (fs_devices->rw_devices < 2) |
| 2204 | return -ENOSPC; | 2301 | return -ENOSPC; |
| 2205 | num_stripes = 2; | 2302 | *num_stripes = 2; |
| 2206 | min_stripes = 2; | 2303 | *min_stripes = 2; |
| 2207 | } | 2304 | } |
| 2208 | if (type & (BTRFS_BLOCK_GROUP_RAID10)) { | 2305 | if (type & (BTRFS_BLOCK_GROUP_RAID10)) { |
| 2209 | num_stripes = fs_devices->rw_devices; | 2306 | *num_stripes = fs_devices->rw_devices; |
| 2210 | if (num_stripes < 4) | 2307 | if (*num_stripes < 4) |
| 2211 | return -ENOSPC; | 2308 | return -ENOSPC; |
| 2212 | num_stripes &= ~(u32)1; | 2309 | *num_stripes &= ~(u32)1; |
| 2213 | sub_stripes = 2; | 2310 | *sub_stripes = 2; |
| 2214 | min_stripes = 4; | 2311 | *min_stripes = 4; |
| 2215 | } | 2312 | } |
| 2216 | 2313 | ||
| 2314 | return 0; | ||
| 2315 | } | ||
| 2316 | |||
| 2317 | static u64 __btrfs_calc_stripe_size(struct btrfs_fs_devices *fs_devices, | ||
| 2318 | u64 proposed_size, u64 type, | ||
| 2319 | int num_stripes, int small_stripe) | ||
| 2320 | { | ||
| 2321 | int min_stripe_size = 1 * 1024 * 1024; | ||
| 2322 | u64 calc_size = proposed_size; | ||
| 2323 | u64 max_chunk_size = calc_size; | ||
| 2324 | int ncopies = 1; | ||
| 2325 | |||
| 2326 | if (type & (BTRFS_BLOCK_GROUP_RAID1 | | ||
| 2327 | BTRFS_BLOCK_GROUP_DUP | | ||
| 2328 | BTRFS_BLOCK_GROUP_RAID10)) | ||
| 2329 | ncopies = 2; | ||
| 2330 | |||
| 2217 | if (type & BTRFS_BLOCK_GROUP_DATA) { | 2331 | if (type & BTRFS_BLOCK_GROUP_DATA) { |
| 2218 | max_chunk_size = 10 * calc_size; | 2332 | max_chunk_size = 10 * calc_size; |
| 2219 | min_stripe_size = 64 * 1024 * 1024; | 2333 | min_stripe_size = 64 * 1024 * 1024; |
| @@ -2230,51 +2344,209 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, | |||
| 2230 | max_chunk_size = min(div_factor(fs_devices->total_rw_bytes, 1), | 2344 | max_chunk_size = min(div_factor(fs_devices->total_rw_bytes, 1), |
| 2231 | max_chunk_size); | 2345 | max_chunk_size); |
| 2232 | 2346 | ||
| 2233 | again: | 2347 | if (calc_size * num_stripes > max_chunk_size * ncopies) { |
| 2234 | max_avail = 0; | 2348 | calc_size = max_chunk_size * ncopies; |
| 2235 | if (!map || map->num_stripes != num_stripes) { | ||
| 2236 | kfree(map); | ||
| 2237 | map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS); | ||
| 2238 | if (!map) | ||
| 2239 | return -ENOMEM; | ||
| 2240 | map->num_stripes = num_stripes; | ||
| 2241 | } | ||
| 2242 | |||
| 2243 | if (calc_size * num_stripes > max_chunk_size) { | ||
| 2244 | calc_size = max_chunk_size; | ||
| 2245 | do_div(calc_size, num_stripes); | 2349 | do_div(calc_size, num_stripes); |
| 2246 | do_div(calc_size, stripe_len); | 2350 | do_div(calc_size, BTRFS_STRIPE_LEN); |
| 2247 | calc_size *= stripe_len; | 2351 | calc_size *= BTRFS_STRIPE_LEN; |
| 2248 | } | 2352 | } |
| 2249 | 2353 | ||
| 2250 | /* we don't want tiny stripes */ | 2354 | /* we don't want tiny stripes */ |
| 2251 | if (!looped) | 2355 | if (!small_stripe) |
| 2252 | calc_size = max_t(u64, min_stripe_size, calc_size); | 2356 | calc_size = max_t(u64, min_stripe_size, calc_size); |
| 2253 | 2357 | ||
| 2254 | /* | 2358 | /* |
| 2255 | * we're about to do_div by the stripe_len so lets make sure | 2359 | * we're about to do_div by the BTRFS_STRIPE_LEN so lets make sure |
| 2256 | * we end up with something bigger than a stripe | 2360 | * we end up with something bigger than a stripe |
| 2257 | */ | 2361 | */ |
| 2258 | calc_size = max_t(u64, calc_size, stripe_len * 4); | 2362 | calc_size = max_t(u64, calc_size, BTRFS_STRIPE_LEN); |
| 2363 | |||
| 2364 | do_div(calc_size, BTRFS_STRIPE_LEN); | ||
| 2365 | calc_size *= BTRFS_STRIPE_LEN; | ||
| 2366 | |||
| 2367 | return calc_size; | ||
| 2368 | } | ||
| 2369 | |||
| 2370 | static struct map_lookup *__shrink_map_lookup_stripes(struct map_lookup *map, | ||
| 2371 | int num_stripes) | ||
| 2372 | { | ||
| 2373 | struct map_lookup *new; | ||
| 2374 | size_t len = map_lookup_size(num_stripes); | ||
| 2375 | |||
| 2376 | BUG_ON(map->num_stripes < num_stripes); | ||
| 2377 | |||
| 2378 | if (map->num_stripes == num_stripes) | ||
| 2379 | return map; | ||
| 2380 | |||
| 2381 | new = kmalloc(len, GFP_NOFS); | ||
| 2382 | if (!new) { | ||
| 2383 | /* just change map->num_stripes */ | ||
| 2384 | map->num_stripes = num_stripes; | ||
| 2385 | return map; | ||
| 2386 | } | ||
| 2387 | |||
| 2388 | memcpy(new, map, len); | ||
| 2389 | new->num_stripes = num_stripes; | ||
| 2390 | kfree(map); | ||
| 2391 | return new; | ||
| 2392 | } | ||
| 2393 | |||
| 2394 | /* | ||
| 2395 | * helper to allocate device space from btrfs_device_info, in which we stored | ||
| 2396 | * max free space information of every device. It is used when we can not | ||
| 2397 | * allocate chunks by default size. | ||
| 2398 | * | ||
| 2399 | * By this helper, we can allocate a new chunk as larger as possible. | ||
| 2400 | */ | ||
| 2401 | static int __btrfs_alloc_tiny_space(struct btrfs_trans_handle *trans, | ||
| 2402 | struct btrfs_fs_devices *fs_devices, | ||
| 2403 | struct btrfs_device_info *devices, | ||
| 2404 | int nr_device, u64 type, | ||
| 2405 | struct map_lookup **map_lookup, | ||
| 2406 | int min_stripes, u64 *stripe_size) | ||
| 2407 | { | ||
| 2408 | int i, index, sort_again = 0; | ||
| 2409 | int min_devices = min_stripes; | ||
| 2410 | u64 max_avail, min_free; | ||
| 2411 | struct map_lookup *map = *map_lookup; | ||
| 2412 | int ret; | ||
| 2413 | |||
| 2414 | if (nr_device < min_stripes) | ||
| 2415 | return -ENOSPC; | ||
| 2416 | |||
| 2417 | btrfs_descending_sort_devices(devices, nr_device); | ||
| 2418 | |||
| 2419 | max_avail = devices[0].max_avail; | ||
| 2420 | if (!max_avail) | ||
| 2421 | return -ENOSPC; | ||
| 2422 | |||
| 2423 | for (i = 0; i < nr_device; i++) { | ||
| 2424 | /* | ||
| 2425 | * if dev_offset = 0, it means the free space of this device | ||
| 2426 | * is less than what we need, and we didn't search max avail | ||
| 2427 | * extent on this device, so do it now. | ||
| 2428 | */ | ||
| 2429 | if (!devices[i].dev_offset) { | ||
| 2430 | ret = find_free_dev_extent(trans, devices[i].dev, | ||
| 2431 | max_avail, | ||
| 2432 | &devices[i].dev_offset, | ||
| 2433 | &devices[i].max_avail); | ||
| 2434 | if (ret != 0 && ret != -ENOSPC) | ||
| 2435 | return ret; | ||
| 2436 | sort_again = 1; | ||
| 2437 | } | ||
| 2438 | } | ||
| 2439 | |||
| 2440 | /* we update the max avail free extent of each devices, sort again */ | ||
| 2441 | if (sort_again) | ||
| 2442 | btrfs_descending_sort_devices(devices, nr_device); | ||
| 2443 | |||
| 2444 | if (type & BTRFS_BLOCK_GROUP_DUP) | ||
| 2445 | min_devices = 1; | ||
| 2446 | |||
| 2447 | if (!devices[min_devices - 1].max_avail) | ||
| 2448 | return -ENOSPC; | ||
| 2449 | |||
| 2450 | max_avail = devices[min_devices - 1].max_avail; | ||
| 2451 | if (type & BTRFS_BLOCK_GROUP_DUP) | ||
| 2452 | do_div(max_avail, 2); | ||
| 2453 | |||
| 2454 | max_avail = __btrfs_calc_stripe_size(fs_devices, max_avail, type, | ||
| 2455 | min_stripes, 1); | ||
| 2456 | if (type & BTRFS_BLOCK_GROUP_DUP) | ||
| 2457 | min_free = max_avail * 2; | ||
| 2458 | else | ||
| 2459 | min_free = max_avail; | ||
| 2460 | |||
| 2461 | if (min_free > devices[min_devices - 1].max_avail) | ||
| 2462 | return -ENOSPC; | ||
| 2463 | |||
| 2464 | map = __shrink_map_lookup_stripes(map, min_stripes); | ||
| 2465 | *stripe_size = max_avail; | ||
| 2466 | |||
| 2467 | index = 0; | ||
| 2468 | for (i = 0; i < min_stripes; i++) { | ||
| 2469 | map->stripes[i].dev = devices[index].dev; | ||
| 2470 | map->stripes[i].physical = devices[index].dev_offset; | ||
| 2471 | if (type & BTRFS_BLOCK_GROUP_DUP) { | ||
| 2472 | i++; | ||
| 2473 | map->stripes[i].dev = devices[index].dev; | ||
| 2474 | map->stripes[i].physical = devices[index].dev_offset + | ||
| 2475 | max_avail; | ||
| 2476 | } | ||
| 2477 | index++; | ||
| 2478 | } | ||
| 2479 | *map_lookup = map; | ||
| 2480 | |||
| 2481 | return 0; | ||
| 2482 | } | ||
| 2259 | 2483 | ||
| 2260 | do_div(calc_size, stripe_len); | 2484 | static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, |
| 2261 | calc_size *= stripe_len; | 2485 | struct btrfs_root *extent_root, |
| 2486 | struct map_lookup **map_ret, | ||
| 2487 | u64 *num_bytes, u64 *stripe_size, | ||
| 2488 | u64 start, u64 type) | ||
| 2489 | { | ||
| 2490 | struct btrfs_fs_info *info = extent_root->fs_info; | ||
| 2491 | struct btrfs_device *device = NULL; | ||
| 2492 | struct btrfs_fs_devices *fs_devices = info->fs_devices; | ||
| 2493 | struct list_head *cur; | ||
| 2494 | struct map_lookup *map; | ||
| 2495 | struct extent_map_tree *em_tree; | ||
| 2496 | struct extent_map *em; | ||
| 2497 | struct btrfs_device_info *devices_info; | ||
| 2498 | struct list_head private_devs; | ||
| 2499 | u64 calc_size = 1024 * 1024 * 1024; | ||
| 2500 | u64 min_free; | ||
| 2501 | u64 avail; | ||
| 2502 | u64 dev_offset; | ||
| 2503 | int num_stripes; | ||
| 2504 | int min_stripes; | ||
| 2505 | int sub_stripes; | ||
| 2506 | int min_devices; /* the min number of devices we need */ | ||
| 2507 | int i; | ||
| 2508 | int ret; | ||
| 2509 | int index; | ||
| 2510 | |||
| 2511 | if ((type & BTRFS_BLOCK_GROUP_RAID1) && | ||
| 2512 | (type & BTRFS_BLOCK_GROUP_DUP)) { | ||
| 2513 | WARN_ON(1); | ||
| 2514 | type &= ~BTRFS_BLOCK_GROUP_DUP; | ||
| 2515 | } | ||
| 2516 | if (list_empty(&fs_devices->alloc_list)) | ||
| 2517 | return -ENOSPC; | ||
| 2518 | |||
| 2519 | ret = __btrfs_calc_nstripes(fs_devices, type, &num_stripes, | ||
| 2520 | &min_stripes, &sub_stripes); | ||
| 2521 | if (ret) | ||
| 2522 | return ret; | ||
| 2523 | |||
| 2524 | devices_info = kzalloc(sizeof(*devices_info) * fs_devices->rw_devices, | ||
| 2525 | GFP_NOFS); | ||
| 2526 | if (!devices_info) | ||
| 2527 | return -ENOMEM; | ||
| 2528 | |||
| 2529 | map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS); | ||
| 2530 | if (!map) { | ||
| 2531 | ret = -ENOMEM; | ||
| 2532 | goto error; | ||
| 2533 | } | ||
| 2534 | map->num_stripes = num_stripes; | ||
| 2262 | 2535 | ||
| 2263 | cur = fs_devices->alloc_list.next; | 2536 | cur = fs_devices->alloc_list.next; |
| 2264 | index = 0; | 2537 | index = 0; |
| 2538 | i = 0; | ||
| 2265 | 2539 | ||
| 2266 | if (type & BTRFS_BLOCK_GROUP_DUP) | 2540 | calc_size = __btrfs_calc_stripe_size(fs_devices, calc_size, type, |
| 2541 | num_stripes, 0); | ||
| 2542 | |||
| 2543 | if (type & BTRFS_BLOCK_GROUP_DUP) { | ||
| 2267 | min_free = calc_size * 2; | 2544 | min_free = calc_size * 2; |
| 2268 | else | 2545 | min_devices = 1; |
| 2546 | } else { | ||
| 2269 | min_free = calc_size; | 2547 | min_free = calc_size; |
| 2270 | 2548 | min_devices = min_stripes; | |
| 2271 | /* | 2549 | } |
| 2272 | * we add 1MB because we never use the first 1MB of the device, unless | ||
| 2273 | * we've looped, then we are likely allocating the maximum amount of | ||
| 2274 | * space left already | ||
| 2275 | */ | ||
| 2276 | if (!looped) | ||
| 2277 | min_free += 1024 * 1024; | ||
| 2278 | 2550 | ||
| 2279 | INIT_LIST_HEAD(&private_devs); | 2551 | INIT_LIST_HEAD(&private_devs); |
| 2280 | while (index < num_stripes) { | 2552 | while (index < num_stripes) { |
| @@ -2287,27 +2559,39 @@ again: | |||
| 2287 | cur = cur->next; | 2559 | cur = cur->next; |
| 2288 | 2560 | ||
| 2289 | if (device->in_fs_metadata && avail >= min_free) { | 2561 | if (device->in_fs_metadata && avail >= min_free) { |
| 2290 | ret = find_free_dev_extent(trans, device, | 2562 | ret = find_free_dev_extent(trans, device, min_free, |
| 2291 | min_free, &dev_offset, | 2563 | &devices_info[i].dev_offset, |
| 2292 | &max_avail); | 2564 | &devices_info[i].max_avail); |
| 2293 | if (ret == 0) { | 2565 | if (ret == 0) { |
| 2294 | list_move_tail(&device->dev_alloc_list, | 2566 | list_move_tail(&device->dev_alloc_list, |
| 2295 | &private_devs); | 2567 | &private_devs); |
| 2296 | map->stripes[index].dev = device; | 2568 | map->stripes[index].dev = device; |
| 2297 | map->stripes[index].physical = dev_offset; | 2569 | map->stripes[index].physical = |
| 2570 | devices_info[i].dev_offset; | ||
| 2298 | index++; | 2571 | index++; |
| 2299 | if (type & BTRFS_BLOCK_GROUP_DUP) { | 2572 | if (type & BTRFS_BLOCK_GROUP_DUP) { |
| 2300 | map->stripes[index].dev = device; | 2573 | map->stripes[index].dev = device; |
| 2301 | map->stripes[index].physical = | 2574 | map->stripes[index].physical = |
| 2302 | dev_offset + calc_size; | 2575 | devices_info[i].dev_offset + |
| 2576 | calc_size; | ||
| 2303 | index++; | 2577 | index++; |
| 2304 | } | 2578 | } |
| 2305 | } | 2579 | } else if (ret != -ENOSPC) |
| 2306 | } else if (device->in_fs_metadata && avail > max_avail) | 2580 | goto error; |
| 2307 | max_avail = avail; | 2581 | |
| 2582 | devices_info[i].dev = device; | ||
| 2583 | i++; | ||
| 2584 | } else if (device->in_fs_metadata && | ||
| 2585 | avail >= BTRFS_STRIPE_LEN) { | ||
| 2586 | devices_info[i].dev = device; | ||
| 2587 | devices_info[i].max_avail = avail; | ||
| 2588 | i++; | ||
| 2589 | } | ||
| 2590 | |||
| 2308 | if (cur == &fs_devices->alloc_list) | 2591 | if (cur == &fs_devices->alloc_list) |
| 2309 | break; | 2592 | break; |
| 2310 | } | 2593 | } |
| 2594 | |||
| 2311 | list_splice(&private_devs, &fs_devices->alloc_list); | 2595 | list_splice(&private_devs, &fs_devices->alloc_list); |
| 2312 | if (index < num_stripes) { | 2596 | if (index < num_stripes) { |
| 2313 | if (index >= min_stripes) { | 2597 | if (index >= min_stripes) { |
| @@ -2316,34 +2600,36 @@ again: | |||
| 2316 | num_stripes /= sub_stripes; | 2600 | num_stripes /= sub_stripes; |
| 2317 | num_stripes *= sub_stripes; | 2601 | num_stripes *= sub_stripes; |
| 2318 | } | 2602 | } |
| 2319 | looped = 1; | 2603 | |
| 2320 | goto again; | 2604 | map = __shrink_map_lookup_stripes(map, num_stripes); |
| 2321 | } | 2605 | } else if (i >= min_devices) { |
| 2322 | if (!looped && max_avail > 0) { | 2606 | ret = __btrfs_alloc_tiny_space(trans, fs_devices, |
| 2323 | looped = 1; | 2607 | devices_info, i, type, |
| 2324 | calc_size = max_avail; | 2608 | &map, min_stripes, |
| 2325 | goto again; | 2609 | &calc_size); |
| 2610 | if (ret) | ||
| 2611 | goto error; | ||
| 2612 | } else { | ||
| 2613 | ret = -ENOSPC; | ||
| 2614 | goto error; | ||
| 2326 | } | 2615 | } |
| 2327 | kfree(map); | ||
| 2328 | return -ENOSPC; | ||
| 2329 | } | 2616 | } |
| 2330 | map->sector_size = extent_root->sectorsize; | 2617 | map->sector_size = extent_root->sectorsize; |
| 2331 | map->stripe_len = stripe_len; | 2618 | map->stripe_len = BTRFS_STRIPE_LEN; |
| 2332 | map->io_align = stripe_len; | 2619 | map->io_align = BTRFS_STRIPE_LEN; |
| 2333 | map->io_width = stripe_len; | 2620 | map->io_width = BTRFS_STRIPE_LEN; |
| 2334 | map->type = type; | 2621 | map->type = type; |
| 2335 | map->num_stripes = num_stripes; | ||
| 2336 | map->sub_stripes = sub_stripes; | 2622 | map->sub_stripes = sub_stripes; |
| 2337 | 2623 | ||
| 2338 | *map_ret = map; | 2624 | *map_ret = map; |
| 2339 | *stripe_size = calc_size; | 2625 | *stripe_size = calc_size; |
| 2340 | *num_bytes = chunk_bytes_by_type(type, calc_size, | 2626 | *num_bytes = chunk_bytes_by_type(type, calc_size, |
| 2341 | num_stripes, sub_stripes); | 2627 | map->num_stripes, sub_stripes); |
| 2342 | 2628 | ||
| 2343 | em = alloc_extent_map(GFP_NOFS); | 2629 | em = alloc_extent_map(GFP_NOFS); |
| 2344 | if (!em) { | 2630 | if (!em) { |
| 2345 | kfree(map); | 2631 | ret = -ENOMEM; |
| 2346 | return -ENOMEM; | 2632 | goto error; |
| 2347 | } | 2633 | } |
| 2348 | em->bdev = (struct block_device *)map; | 2634 | em->bdev = (struct block_device *)map; |
| 2349 | em->start = start; | 2635 | em->start = start; |
| @@ -2376,7 +2662,13 @@ again: | |||
| 2376 | index++; | 2662 | index++; |
| 2377 | } | 2663 | } |
| 2378 | 2664 | ||
| 2665 | kfree(devices_info); | ||
| 2379 | return 0; | 2666 | return 0; |
| 2667 | |||
| 2668 | error: | ||
| 2669 | kfree(map); | ||
| 2670 | kfree(devices_info); | ||
| 2671 | return ret; | ||
| 2380 | } | 2672 | } |
| 2381 | 2673 | ||
| 2382 | static int __finish_chunk_alloc(struct btrfs_trans_handle *trans, | 2674 | static int __finish_chunk_alloc(struct btrfs_trans_handle *trans, |
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 1be781079450..7fb59d45fe8c 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h | |||
| @@ -20,8 +20,11 @@ | |||
| 20 | #define __BTRFS_VOLUMES_ | 20 | #define __BTRFS_VOLUMES_ |
| 21 | 21 | ||
| 22 | #include <linux/bio.h> | 22 | #include <linux/bio.h> |
| 23 | #include <linux/sort.h> | ||
| 23 | #include "async-thread.h" | 24 | #include "async-thread.h" |
| 24 | 25 | ||
| 26 | #define BTRFS_STRIPE_LEN (64 * 1024) | ||
| 27 | |||
| 25 | struct buffer_head; | 28 | struct buffer_head; |
| 26 | struct btrfs_pending_bios { | 29 | struct btrfs_pending_bios { |
| 27 | struct bio *head; | 30 | struct bio *head; |
| @@ -136,6 +139,30 @@ struct btrfs_multi_bio { | |||
| 136 | struct btrfs_bio_stripe stripes[]; | 139 | struct btrfs_bio_stripe stripes[]; |
| 137 | }; | 140 | }; |
| 138 | 141 | ||
| 142 | struct btrfs_device_info { | ||
| 143 | struct btrfs_device *dev; | ||
| 144 | u64 dev_offset; | ||
| 145 | u64 max_avail; | ||
| 146 | }; | ||
| 147 | |||
| 148 | /* Used to sort the devices by max_avail(descending sort) */ | ||
| 149 | int btrfs_cmp_device_free_bytes(const void *dev_info1, const void *dev_info2); | ||
| 150 | |||
| 151 | /* | ||
| 152 | * sort the devices by max_avail, in which max free extent size of each device | ||
| 153 | * is stored.(Descending Sort) | ||
| 154 | */ | ||
| 155 | static inline void btrfs_descending_sort_devices( | ||
| 156 | struct btrfs_device_info *devices, | ||
| 157 | size_t nr_devices) | ||
| 158 | { | ||
| 159 | sort(devices, nr_devices, sizeof(struct btrfs_device_info), | ||
| 160 | btrfs_cmp_device_free_bytes, NULL); | ||
| 161 | } | ||
| 162 | |||
| 163 | int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start, | ||
| 164 | u64 end, u64 *length); | ||
| 165 | |||
| 139 | #define btrfs_multi_bio_size(n) (sizeof(struct btrfs_multi_bio) + \ | 166 | #define btrfs_multi_bio_size(n) (sizeof(struct btrfs_multi_bio) + \ |
| 140 | (sizeof(struct btrfs_bio_stripe) * (n))) | 167 | (sizeof(struct btrfs_bio_stripe) * (n))) |
| 141 | 168 | ||
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index 698fdd2c739c..a5776531dc2b 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c | |||
| @@ -316,6 +316,15 @@ ssize_t btrfs_getxattr(struct dentry *dentry, const char *name, | |||
| 316 | int btrfs_setxattr(struct dentry *dentry, const char *name, const void *value, | 316 | int btrfs_setxattr(struct dentry *dentry, const char *name, const void *value, |
| 317 | size_t size, int flags) | 317 | size_t size, int flags) |
| 318 | { | 318 | { |
| 319 | struct btrfs_root *root = BTRFS_I(dentry->d_inode)->root; | ||
| 320 | |||
| 321 | /* | ||
| 322 | * The permission on security.* and system.* is not checked | ||
| 323 | * in permission(). | ||
| 324 | */ | ||
| 325 | if (btrfs_root_readonly(root)) | ||
| 326 | return -EROFS; | ||
| 327 | |||
| 319 | /* | 328 | /* |
| 320 | * If this is a request for a synthetic attribute in the system.* | 329 | * If this is a request for a synthetic attribute in the system.* |
| 321 | * namespace use the generic infrastructure to resolve a handler | 330 | * namespace use the generic infrastructure to resolve a handler |
| @@ -336,6 +345,15 @@ int btrfs_setxattr(struct dentry *dentry, const char *name, const void *value, | |||
| 336 | 345 | ||
| 337 | int btrfs_removexattr(struct dentry *dentry, const char *name) | 346 | int btrfs_removexattr(struct dentry *dentry, const char *name) |
| 338 | { | 347 | { |
| 348 | struct btrfs_root *root = BTRFS_I(dentry->d_inode)->root; | ||
| 349 | |||
| 350 | /* | ||
| 351 | * The permission on security.* and system.* is not checked | ||
| 352 | * in permission(). | ||
| 353 | */ | ||
| 354 | if (btrfs_root_readonly(root)) | ||
| 355 | return -EROFS; | ||
| 356 | |||
| 339 | /* | 357 | /* |
| 340 | * If this is a request for a synthetic attribute in the system.* | 358 | * If this is a request for a synthetic attribute in the system.* |
| 341 | * namespace use the generic infrastructure to resolve a handler | 359 | * namespace use the generic infrastructure to resolve a handler |
diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c index b9cd5445f71c..f5ec2d44150d 100644 --- a/fs/btrfs/zlib.c +++ b/fs/btrfs/zlib.c | |||
| @@ -32,15 +32,6 @@ | |||
| 32 | #include <linux/bio.h> | 32 | #include <linux/bio.h> |
| 33 | #include "compression.h" | 33 | #include "compression.h" |
| 34 | 34 | ||
| 35 | /* Plan: call deflate() with avail_in == *sourcelen, | ||
| 36 | avail_out = *dstlen - 12 and flush == Z_FINISH. | ||
| 37 | If it doesn't manage to finish, call it again with | ||
| 38 | avail_in == 0 and avail_out set to the remaining 12 | ||
| 39 | bytes for it to clean up. | ||
| 40 | Q: Is 12 bytes sufficient? | ||
| 41 | */ | ||
| 42 | #define STREAM_END_SPACE 12 | ||
| 43 | |||
| 44 | struct workspace { | 35 | struct workspace { |
| 45 | z_stream inf_strm; | 36 | z_stream inf_strm; |
| 46 | z_stream def_strm; | 37 | z_stream def_strm; |
| @@ -48,152 +39,51 @@ struct workspace { | |||
| 48 | struct list_head list; | 39 | struct list_head list; |
| 49 | }; | 40 | }; |
| 50 | 41 | ||
| 51 | static LIST_HEAD(idle_workspace); | 42 | static void zlib_free_workspace(struct list_head *ws) |
| 52 | static DEFINE_SPINLOCK(workspace_lock); | 43 | { |
| 53 | static unsigned long num_workspace; | 44 | struct workspace *workspace = list_entry(ws, struct workspace, list); |
| 54 | static atomic_t alloc_workspace = ATOMIC_INIT(0); | ||
| 55 | static DECLARE_WAIT_QUEUE_HEAD(workspace_wait); | ||
| 56 | 45 | ||
| 57 | /* | 46 | vfree(workspace->def_strm.workspace); |
| 58 | * this finds an available zlib workspace or allocates a new one | 47 | vfree(workspace->inf_strm.workspace); |
| 59 | * NULL or an ERR_PTR is returned if things go bad. | 48 | kfree(workspace->buf); |
| 60 | */ | 49 | kfree(workspace); |
| 61 | static struct workspace *find_zlib_workspace(void) | 50 | } |
| 51 | |||
| 52 | static struct list_head *zlib_alloc_workspace(void) | ||
| 62 | { | 53 | { |
| 63 | struct workspace *workspace; | 54 | struct workspace *workspace; |
| 64 | int ret; | ||
| 65 | int cpus = num_online_cpus(); | ||
| 66 | |||
| 67 | again: | ||
| 68 | spin_lock(&workspace_lock); | ||
| 69 | if (!list_empty(&idle_workspace)) { | ||
| 70 | workspace = list_entry(idle_workspace.next, struct workspace, | ||
| 71 | list); | ||
| 72 | list_del(&workspace->list); | ||
| 73 | num_workspace--; | ||
| 74 | spin_unlock(&workspace_lock); | ||
| 75 | return workspace; | ||
| 76 | 55 | ||
| 77 | } | ||
| 78 | spin_unlock(&workspace_lock); | ||
| 79 | if (atomic_read(&alloc_workspace) > cpus) { | ||
| 80 | DEFINE_WAIT(wait); | ||
| 81 | prepare_to_wait(&workspace_wait, &wait, TASK_UNINTERRUPTIBLE); | ||
| 82 | if (atomic_read(&alloc_workspace) > cpus) | ||
| 83 | schedule(); | ||
| 84 | finish_wait(&workspace_wait, &wait); | ||
| 85 | goto again; | ||
| 86 | } | ||
| 87 | atomic_inc(&alloc_workspace); | ||
| 88 | workspace = kzalloc(sizeof(*workspace), GFP_NOFS); | 56 | workspace = kzalloc(sizeof(*workspace), GFP_NOFS); |
| 89 | if (!workspace) { | 57 | if (!workspace) |
| 90 | ret = -ENOMEM; | 58 | return ERR_PTR(-ENOMEM); |
| 91 | goto fail; | ||
| 92 | } | ||
| 93 | 59 | ||
| 94 | workspace->def_strm.workspace = vmalloc(zlib_deflate_workspacesize()); | 60 | workspace->def_strm.workspace = vmalloc(zlib_deflate_workspacesize()); |
| 95 | if (!workspace->def_strm.workspace) { | ||
| 96 | ret = -ENOMEM; | ||
| 97 | goto fail; | ||
| 98 | } | ||
| 99 | workspace->inf_strm.workspace = vmalloc(zlib_inflate_workspacesize()); | 61 | workspace->inf_strm.workspace = vmalloc(zlib_inflate_workspacesize()); |
| 100 | if (!workspace->inf_strm.workspace) { | ||
| 101 | ret = -ENOMEM; | ||
| 102 | goto fail_inflate; | ||
| 103 | } | ||
| 104 | workspace->buf = kmalloc(PAGE_CACHE_SIZE, GFP_NOFS); | 62 | workspace->buf = kmalloc(PAGE_CACHE_SIZE, GFP_NOFS); |
| 105 | if (!workspace->buf) { | 63 | if (!workspace->def_strm.workspace || |
| 106 | ret = -ENOMEM; | 64 | !workspace->inf_strm.workspace || !workspace->buf) |
| 107 | goto fail_kmalloc; | 65 | goto fail; |
| 108 | } | ||
| 109 | return workspace; | ||
| 110 | |||
| 111 | fail_kmalloc: | ||
| 112 | vfree(workspace->inf_strm.workspace); | ||
| 113 | fail_inflate: | ||
| 114 | vfree(workspace->def_strm.workspace); | ||
| 115 | fail: | ||
| 116 | kfree(workspace); | ||
| 117 | atomic_dec(&alloc_workspace); | ||
| 118 | wake_up(&workspace_wait); | ||
| 119 | return ERR_PTR(ret); | ||
| 120 | } | ||
| 121 | |||
| 122 | /* | ||
| 123 | * put a workspace struct back on the list or free it if we have enough | ||
| 124 | * idle ones sitting around | ||
| 125 | */ | ||
| 126 | static int free_workspace(struct workspace *workspace) | ||
| 127 | { | ||
| 128 | spin_lock(&workspace_lock); | ||
| 129 | if (num_workspace < num_online_cpus()) { | ||
| 130 | list_add_tail(&workspace->list, &idle_workspace); | ||
| 131 | num_workspace++; | ||
| 132 | spin_unlock(&workspace_lock); | ||
| 133 | if (waitqueue_active(&workspace_wait)) | ||
| 134 | wake_up(&workspace_wait); | ||
| 135 | return 0; | ||
| 136 | } | ||
| 137 | spin_unlock(&workspace_lock); | ||
| 138 | vfree(workspace->def_strm.workspace); | ||
| 139 | vfree(workspace->inf_strm.workspace); | ||
| 140 | kfree(workspace->buf); | ||
| 141 | kfree(workspace); | ||
| 142 | 66 | ||
| 143 | atomic_dec(&alloc_workspace); | 67 | INIT_LIST_HEAD(&workspace->list); |
| 144 | if (waitqueue_active(&workspace_wait)) | ||
| 145 | wake_up(&workspace_wait); | ||
| 146 | return 0; | ||
| 147 | } | ||
| 148 | 68 | ||
| 149 | /* | 69 | return &workspace->list; |
| 150 | * cleanup function for module exit | 70 | fail: |
| 151 | */ | 71 | zlib_free_workspace(&workspace->list); |
| 152 | static void free_workspaces(void) | 72 | return ERR_PTR(-ENOMEM); |
| 153 | { | ||
| 154 | struct workspace *workspace; | ||
| 155 | while (!list_empty(&idle_workspace)) { | ||
| 156 | workspace = list_entry(idle_workspace.next, struct workspace, | ||
| 157 | list); | ||
| 158 | list_del(&workspace->list); | ||
| 159 | vfree(workspace->def_strm.workspace); | ||
| 160 | vfree(workspace->inf_strm.workspace); | ||
| 161 | kfree(workspace->buf); | ||
| 162 | kfree(workspace); | ||
| 163 | atomic_dec(&alloc_workspace); | ||
| 164 | } | ||
| 165 | } | 73 | } |
| 166 | 74 | ||
| 167 | /* | 75 | static int zlib_compress_pages(struct list_head *ws, |
| 168 | * given an address space and start/len, compress the bytes. | 76 | struct address_space *mapping, |
| 169 | * | 77 | u64 start, unsigned long len, |
| 170 | * pages are allocated to hold the compressed result and stored | 78 | struct page **pages, |
| 171 | * in 'pages' | 79 | unsigned long nr_dest_pages, |
| 172 | * | 80 | unsigned long *out_pages, |
| 173 | * out_pages is used to return the number of pages allocated. There | 81 | unsigned long *total_in, |
| 174 | * may be pages allocated even if we return an error | 82 | unsigned long *total_out, |
| 175 | * | 83 | unsigned long max_out) |
| 176 | * total_in is used to return the number of bytes actually read. It | ||
| 177 | * may be smaller then len if we had to exit early because we | ||
| 178 | * ran out of room in the pages array or because we cross the | ||
| 179 | * max_out threshold. | ||
| 180 | * | ||
| 181 | * total_out is used to return the total number of compressed bytes | ||
| 182 | * | ||
| 183 | * max_out tells us the max number of bytes that we're allowed to | ||
| 184 | * stuff into pages | ||
| 185 | */ | ||
| 186 | int btrfs_zlib_compress_pages(struct address_space *mapping, | ||
| 187 | u64 start, unsigned long len, | ||
| 188 | struct page **pages, | ||
| 189 | unsigned long nr_dest_pages, | ||
| 190 | unsigned long *out_pages, | ||
| 191 | unsigned long *total_in, | ||
| 192 | unsigned long *total_out, | ||
| 193 | unsigned long max_out) | ||
| 194 | { | 84 | { |
| 85 | struct workspace *workspace = list_entry(ws, struct workspace, list); | ||
| 195 | int ret; | 86 | int ret; |
| 196 | struct workspace *workspace; | ||
| 197 | char *data_in; | 87 | char *data_in; |
| 198 | char *cpage_out; | 88 | char *cpage_out; |
| 199 | int nr_pages = 0; | 89 | int nr_pages = 0; |
| @@ -205,10 +95,6 @@ int btrfs_zlib_compress_pages(struct address_space *mapping, | |||
| 205 | *total_out = 0; | 95 | *total_out = 0; |
| 206 | *total_in = 0; | 96 | *total_in = 0; |
| 207 | 97 | ||
| 208 | workspace = find_zlib_workspace(); | ||
| 209 | if (IS_ERR(workspace)) | ||
| 210 | return -1; | ||
| 211 | |||
| 212 | if (Z_OK != zlib_deflateInit(&workspace->def_strm, 3)) { | 98 | if (Z_OK != zlib_deflateInit(&workspace->def_strm, 3)) { |
| 213 | printk(KERN_WARNING "deflateInit failed\n"); | 99 | printk(KERN_WARNING "deflateInit failed\n"); |
| 214 | ret = -1; | 100 | ret = -1; |
| @@ -222,6 +108,10 @@ int btrfs_zlib_compress_pages(struct address_space *mapping, | |||
| 222 | data_in = kmap(in_page); | 108 | data_in = kmap(in_page); |
| 223 | 109 | ||
| 224 | out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); | 110 | out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); |
| 111 | if (out_page == NULL) { | ||
| 112 | ret = -1; | ||
| 113 | goto out; | ||
| 114 | } | ||
| 225 | cpage_out = kmap(out_page); | 115 | cpage_out = kmap(out_page); |
| 226 | pages[0] = out_page; | 116 | pages[0] = out_page; |
| 227 | nr_pages = 1; | 117 | nr_pages = 1; |
| @@ -260,6 +150,10 @@ int btrfs_zlib_compress_pages(struct address_space *mapping, | |||
| 260 | goto out; | 150 | goto out; |
| 261 | } | 151 | } |
| 262 | out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); | 152 | out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); |
| 153 | if (out_page == NULL) { | ||
| 154 | ret = -1; | ||
| 155 | goto out; | ||
| 156 | } | ||
| 263 | cpage_out = kmap(out_page); | 157 | cpage_out = kmap(out_page); |
| 264 | pages[nr_pages] = out_page; | 158 | pages[nr_pages] = out_page; |
| 265 | nr_pages++; | 159 | nr_pages++; |
| @@ -314,55 +208,26 @@ out: | |||
| 314 | kunmap(in_page); | 208 | kunmap(in_page); |
| 315 | page_cache_release(in_page); | 209 | page_cache_release(in_page); |
| 316 | } | 210 | } |
| 317 | free_workspace(workspace); | ||
| 318 | return ret; | 211 | return ret; |
| 319 | } | 212 | } |
| 320 | 213 | ||
| 321 | /* | 214 | static int zlib_decompress_biovec(struct list_head *ws, struct page **pages_in, |
| 322 | * pages_in is an array of pages with compressed data. | 215 | u64 disk_start, |
| 323 | * | 216 | struct bio_vec *bvec, |
| 324 | * disk_start is the starting logical offset of this array in the file | 217 | int vcnt, |
| 325 | * | 218 | size_t srclen) |
| 326 | * bvec is a bio_vec of pages from the file that we want to decompress into | ||
| 327 | * | ||
| 328 | * vcnt is the count of pages in the biovec | ||
| 329 | * | ||
| 330 | * srclen is the number of bytes in pages_in | ||
| 331 | * | ||
| 332 | * The basic idea is that we have a bio that was created by readpages. | ||
| 333 | * The pages in the bio are for the uncompressed data, and they may not | ||
| 334 | * be contiguous. They all correspond to the range of bytes covered by | ||
| 335 | * the compressed extent. | ||
| 336 | */ | ||
| 337 | int btrfs_zlib_decompress_biovec(struct page **pages_in, | ||
| 338 | u64 disk_start, | ||
| 339 | struct bio_vec *bvec, | ||
| 340 | int vcnt, | ||
| 341 | size_t srclen) | ||
| 342 | { | 219 | { |
| 343 | int ret = 0; | 220 | struct workspace *workspace = list_entry(ws, struct workspace, list); |
| 221 | int ret = 0, ret2; | ||
| 344 | int wbits = MAX_WBITS; | 222 | int wbits = MAX_WBITS; |
| 345 | struct workspace *workspace; | ||
| 346 | char *data_in; | 223 | char *data_in; |
| 347 | size_t total_out = 0; | 224 | size_t total_out = 0; |
| 348 | unsigned long page_bytes_left; | ||
| 349 | unsigned long page_in_index = 0; | 225 | unsigned long page_in_index = 0; |
| 350 | unsigned long page_out_index = 0; | 226 | unsigned long page_out_index = 0; |
| 351 | struct page *page_out; | ||
| 352 | unsigned long total_pages_in = (srclen + PAGE_CACHE_SIZE - 1) / | 227 | unsigned long total_pages_in = (srclen + PAGE_CACHE_SIZE - 1) / |
| 353 | PAGE_CACHE_SIZE; | 228 | PAGE_CACHE_SIZE; |
| 354 | unsigned long buf_start; | 229 | unsigned long buf_start; |
| 355 | unsigned long buf_offset; | ||
| 356 | unsigned long bytes; | ||
| 357 | unsigned long working_bytes; | ||
| 358 | unsigned long pg_offset; | 230 | unsigned long pg_offset; |
| 359 | unsigned long start_byte; | ||
| 360 | unsigned long current_buf_start; | ||
| 361 | char *kaddr; | ||
| 362 | |||
| 363 | workspace = find_zlib_workspace(); | ||
| 364 | if (IS_ERR(workspace)) | ||
| 365 | return -ENOMEM; | ||
| 366 | 231 | ||
| 367 | data_in = kmap(pages_in[page_in_index]); | 232 | data_in = kmap(pages_in[page_in_index]); |
| 368 | workspace->inf_strm.next_in = data_in; | 233 | workspace->inf_strm.next_in = data_in; |
| @@ -372,8 +237,6 @@ int btrfs_zlib_decompress_biovec(struct page **pages_in, | |||
| 372 | workspace->inf_strm.total_out = 0; | 237 | workspace->inf_strm.total_out = 0; |
| 373 | workspace->inf_strm.next_out = workspace->buf; | 238 | workspace->inf_strm.next_out = workspace->buf; |
| 374 | workspace->inf_strm.avail_out = PAGE_CACHE_SIZE; | 239 | workspace->inf_strm.avail_out = PAGE_CACHE_SIZE; |
| 375 | page_out = bvec[page_out_index].bv_page; | ||
| 376 | page_bytes_left = PAGE_CACHE_SIZE; | ||
| 377 | pg_offset = 0; | 240 | pg_offset = 0; |
| 378 | 241 | ||
| 379 | /* If it's deflate, and it's got no preset dictionary, then | 242 | /* If it's deflate, and it's got no preset dictionary, then |
| @@ -389,107 +252,29 @@ int btrfs_zlib_decompress_biovec(struct page **pages_in, | |||
| 389 | 252 | ||
| 390 | if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) { | 253 | if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) { |
| 391 | printk(KERN_WARNING "inflateInit failed\n"); | 254 | printk(KERN_WARNING "inflateInit failed\n"); |
| 392 | ret = -1; | 255 | return -1; |
| 393 | goto out; | ||
| 394 | } | 256 | } |
| 395 | while (workspace->inf_strm.total_in < srclen) { | 257 | while (workspace->inf_strm.total_in < srclen) { |
| 396 | ret = zlib_inflate(&workspace->inf_strm, Z_NO_FLUSH); | 258 | ret = zlib_inflate(&workspace->inf_strm, Z_NO_FLUSH); |
| 397 | if (ret != Z_OK && ret != Z_STREAM_END) | 259 | if (ret != Z_OK && ret != Z_STREAM_END) |
| 398 | break; | 260 | break; |
| 399 | /* | ||
| 400 | * buf start is the byte offset we're of the start of | ||
| 401 | * our workspace buffer | ||
| 402 | */ | ||
| 403 | buf_start = total_out; | ||
| 404 | 261 | ||
| 405 | /* total_out is the last byte of the workspace buffer */ | 262 | buf_start = total_out; |
| 406 | total_out = workspace->inf_strm.total_out; | 263 | total_out = workspace->inf_strm.total_out; |
| 407 | 264 | ||
| 408 | working_bytes = total_out - buf_start; | 265 | /* we didn't make progress in this inflate call, we're done */ |
| 409 | 266 | if (buf_start == total_out) | |
| 410 | /* | ||
| 411 | * start byte is the first byte of the page we're currently | ||
| 412 | * copying into relative to the start of the compressed data. | ||
| 413 | */ | ||
| 414 | start_byte = page_offset(page_out) - disk_start; | ||
| 415 | |||
| 416 | if (working_bytes == 0) { | ||
| 417 | /* we didn't make progress in this inflate | ||
| 418 | * call, we're done | ||
| 419 | */ | ||
| 420 | if (ret != Z_STREAM_END) | ||
| 421 | ret = -1; | ||
| 422 | break; | 267 | break; |
| 423 | } | ||
| 424 | 268 | ||
| 425 | /* we haven't yet hit data corresponding to this page */ | 269 | ret2 = btrfs_decompress_buf2page(workspace->buf, buf_start, |
| 426 | if (total_out <= start_byte) | 270 | total_out, disk_start, |
| 427 | goto next; | 271 | bvec, vcnt, |
| 428 | 272 | &page_out_index, &pg_offset); | |
| 429 | /* | 273 | if (ret2 == 0) { |
| 430 | * the start of the data we care about is offset into | 274 | ret = 0; |
| 431 | * the middle of our working buffer | 275 | goto done; |
| 432 | */ | ||
| 433 | if (total_out > start_byte && buf_start < start_byte) { | ||
| 434 | buf_offset = start_byte - buf_start; | ||
| 435 | working_bytes -= buf_offset; | ||
| 436 | } else { | ||
| 437 | buf_offset = 0; | ||
| 438 | } | ||
| 439 | current_buf_start = buf_start; | ||
| 440 | |||
| 441 | /* copy bytes from the working buffer into the pages */ | ||
| 442 | while (working_bytes > 0) { | ||
| 443 | bytes = min(PAGE_CACHE_SIZE - pg_offset, | ||
| 444 | PAGE_CACHE_SIZE - buf_offset); | ||
| 445 | bytes = min(bytes, working_bytes); | ||
| 446 | kaddr = kmap_atomic(page_out, KM_USER0); | ||
| 447 | memcpy(kaddr + pg_offset, workspace->buf + buf_offset, | ||
| 448 | bytes); | ||
| 449 | kunmap_atomic(kaddr, KM_USER0); | ||
| 450 | flush_dcache_page(page_out); | ||
| 451 | |||
| 452 | pg_offset += bytes; | ||
| 453 | page_bytes_left -= bytes; | ||
| 454 | buf_offset += bytes; | ||
| 455 | working_bytes -= bytes; | ||
| 456 | current_buf_start += bytes; | ||
| 457 | |||
| 458 | /* check if we need to pick another page */ | ||
| 459 | if (page_bytes_left == 0) { | ||
| 460 | page_out_index++; | ||
| 461 | if (page_out_index >= vcnt) { | ||
| 462 | ret = 0; | ||
| 463 | goto done; | ||
| 464 | } | ||
| 465 | |||
| 466 | page_out = bvec[page_out_index].bv_page; | ||
| 467 | pg_offset = 0; | ||
| 468 | page_bytes_left = PAGE_CACHE_SIZE; | ||
| 469 | start_byte = page_offset(page_out) - disk_start; | ||
| 470 | |||
| 471 | /* | ||
| 472 | * make sure our new page is covered by this | ||
| 473 | * working buffer | ||
| 474 | */ | ||
| 475 | if (total_out <= start_byte) | ||
| 476 | goto next; | ||
| 477 | |||
| 478 | /* the next page in the biovec might not | ||
| 479 | * be adjacent to the last page, but it | ||
| 480 | * might still be found inside this working | ||
| 481 | * buffer. bump our offset pointer | ||
| 482 | */ | ||
| 483 | if (total_out > start_byte && | ||
| 484 | current_buf_start < start_byte) { | ||
| 485 | buf_offset = start_byte - buf_start; | ||
| 486 | working_bytes = total_out - start_byte; | ||
| 487 | current_buf_start = buf_start + | ||
| 488 | buf_offset; | ||
| 489 | } | ||
| 490 | } | ||
| 491 | } | 276 | } |
| 492 | next: | 277 | |
| 493 | workspace->inf_strm.next_out = workspace->buf; | 278 | workspace->inf_strm.next_out = workspace->buf; |
| 494 | workspace->inf_strm.avail_out = PAGE_CACHE_SIZE; | 279 | workspace->inf_strm.avail_out = PAGE_CACHE_SIZE; |
| 495 | 280 | ||
| @@ -516,35 +301,21 @@ done: | |||
| 516 | zlib_inflateEnd(&workspace->inf_strm); | 301 | zlib_inflateEnd(&workspace->inf_strm); |
| 517 | if (data_in) | 302 | if (data_in) |
| 518 | kunmap(pages_in[page_in_index]); | 303 | kunmap(pages_in[page_in_index]); |
| 519 | out: | ||
| 520 | free_workspace(workspace); | ||
| 521 | return ret; | 304 | return ret; |
| 522 | } | 305 | } |
| 523 | 306 | ||
| 524 | /* | 307 | static int zlib_decompress(struct list_head *ws, unsigned char *data_in, |
| 525 | * a less complex decompression routine. Our compressed data fits in a | 308 | struct page *dest_page, |
| 526 | * single page, and we want to read a single page out of it. | 309 | unsigned long start_byte, |
| 527 | * start_byte tells us the offset into the compressed data we're interested in | 310 | size_t srclen, size_t destlen) |
| 528 | */ | ||
| 529 | int btrfs_zlib_decompress(unsigned char *data_in, | ||
| 530 | struct page *dest_page, | ||
| 531 | unsigned long start_byte, | ||
| 532 | size_t srclen, size_t destlen) | ||
| 533 | { | 311 | { |
| 312 | struct workspace *workspace = list_entry(ws, struct workspace, list); | ||
| 534 | int ret = 0; | 313 | int ret = 0; |
| 535 | int wbits = MAX_WBITS; | 314 | int wbits = MAX_WBITS; |
| 536 | struct workspace *workspace; | ||
| 537 | unsigned long bytes_left = destlen; | 315 | unsigned long bytes_left = destlen; |
| 538 | unsigned long total_out = 0; | 316 | unsigned long total_out = 0; |
| 539 | char *kaddr; | 317 | char *kaddr; |
| 540 | 318 | ||
| 541 | if (destlen > PAGE_CACHE_SIZE) | ||
| 542 | return -ENOMEM; | ||
| 543 | |||
| 544 | workspace = find_zlib_workspace(); | ||
| 545 | if (IS_ERR(workspace)) | ||
| 546 | return -ENOMEM; | ||
| 547 | |||
| 548 | workspace->inf_strm.next_in = data_in; | 319 | workspace->inf_strm.next_in = data_in; |
| 549 | workspace->inf_strm.avail_in = srclen; | 320 | workspace->inf_strm.avail_in = srclen; |
| 550 | workspace->inf_strm.total_in = 0; | 321 | workspace->inf_strm.total_in = 0; |
| @@ -565,8 +336,7 @@ int btrfs_zlib_decompress(unsigned char *data_in, | |||
| 565 | 336 | ||
| 566 | if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) { | 337 | if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) { |
| 567 | printk(KERN_WARNING "inflateInit failed\n"); | 338 | printk(KERN_WARNING "inflateInit failed\n"); |
| 568 | ret = -1; | 339 | return -1; |
| 569 | goto out; | ||
| 570 | } | 340 | } |
| 571 | 341 | ||
| 572 | while (bytes_left > 0) { | 342 | while (bytes_left > 0) { |
| @@ -616,12 +386,13 @@ next: | |||
| 616 | ret = 0; | 386 | ret = 0; |
| 617 | 387 | ||
| 618 | zlib_inflateEnd(&workspace->inf_strm); | 388 | zlib_inflateEnd(&workspace->inf_strm); |
| 619 | out: | ||
| 620 | free_workspace(workspace); | ||
| 621 | return ret; | 389 | return ret; |
| 622 | } | 390 | } |
| 623 | 391 | ||
| 624 | void btrfs_zlib_exit(void) | 392 | struct btrfs_compress_op btrfs_zlib_compress = { |
| 625 | { | 393 | .alloc_workspace = zlib_alloc_workspace, |
| 626 | free_workspaces(); | 394 | .free_workspace = zlib_free_workspace, |
| 627 | } | 395 | .compress_pages = zlib_compress_pages, |
| 396 | .decompress_biovec = zlib_decompress_biovec, | ||
| 397 | .decompress = zlib_decompress, | ||
| 398 | }; | ||
