aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/compression.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-05-21 13:49:22 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2016-05-21 13:49:22 -0400
commit07be1337b9e8bfcd855c6e9175b5066a30ac609b (patch)
treee40ad01dc89f6eb17d461939b809fea3387fc2a5 /fs/btrfs/compression.c
parent63d222b9d277c4d7bf08afd1631a7f8e327a825c (diff)
parentc315ef8d9db7f1a0ebd023a395ebdfde1c68057e (diff)
Merge branch 'for-linus-4.7' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs
Pull btrfs updates from Chris Mason: "This has our merge window series of cleanups and fixes. These target a wide range of issues, but do include some important fixes for qgroups, O_DIRECT, and fsync handling. Jeff Mahoney moved around a few definitions to make them easier for userland to consume. Also whiteout support is included now that issues with overlayfs have been cleared up. I have one more fix pending for page faults during btrfs_copy_from_user, but I wanted to get this bulk out the door first" * 'for-linus-4.7' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: (90 commits) btrfs: fix memory leak during RAID 5/6 device replacement Btrfs: add semaphore to synchronize direct IO writes with fsync Btrfs: fix race between block group relocation and nocow writes Btrfs: fix race between fsync and direct IO writes for prealloc extents Btrfs: fix number of transaction units for renames with whiteout Btrfs: pin logs earlier when doing a rename exchange operation Btrfs: unpin logs if rename exchange operation fails Btrfs: fix inode leak on failure to setup whiteout inode in rename btrfs: add support for RENAME_EXCHANGE and RENAME_WHITEOUT Btrfs: pin log earlier when renaming Btrfs: unpin log if rename operation fails Btrfs: don't do unnecessary delalloc flushes when relocating Btrfs: don't wait for unrelated IO to finish before relocation Btrfs: fix empty symlink after creating symlink and fsync parent dir Btrfs: fix for incorrect directory entries after fsync log replay btrfs: build fixup for qgroup_account_snapshot btrfs: qgroup: Fix qgroup accounting when creating snapshot Btrfs: fix fspath error deallocation btrfs: make find_workspace warn if there are no workspaces btrfs: make find_workspace always succeed ...
Diffstat (limited to 'fs/btrfs/compression.c')
-rw-r--r--fs/btrfs/compression.c85
1 files changed, 61 insertions, 24 deletions
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index ff61a41ac90b..658c39b70fba 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -743,8 +743,11 @@ out:
743static struct { 743static struct {
744 struct list_head idle_ws; 744 struct list_head idle_ws;
745 spinlock_t ws_lock; 745 spinlock_t ws_lock;
746 int num_ws; 746 /* Number of free workspaces */
747 atomic_t alloc_ws; 747 int free_ws;
748 /* Total number of allocated workspaces */
749 atomic_t total_ws;
750 /* Waiters for a free workspace */
748 wait_queue_head_t ws_wait; 751 wait_queue_head_t ws_wait;
749} btrfs_comp_ws[BTRFS_COMPRESS_TYPES]; 752} btrfs_comp_ws[BTRFS_COMPRESS_TYPES];
750 753
@@ -758,16 +761,34 @@ void __init btrfs_init_compress(void)
758 int i; 761 int i;
759 762
760 for (i = 0; i < BTRFS_COMPRESS_TYPES; i++) { 763 for (i = 0; i < BTRFS_COMPRESS_TYPES; i++) {
764 struct list_head *workspace;
765
761 INIT_LIST_HEAD(&btrfs_comp_ws[i].idle_ws); 766 INIT_LIST_HEAD(&btrfs_comp_ws[i].idle_ws);
762 spin_lock_init(&btrfs_comp_ws[i].ws_lock); 767 spin_lock_init(&btrfs_comp_ws[i].ws_lock);
763 atomic_set(&btrfs_comp_ws[i].alloc_ws, 0); 768 atomic_set(&btrfs_comp_ws[i].total_ws, 0);
764 init_waitqueue_head(&btrfs_comp_ws[i].ws_wait); 769 init_waitqueue_head(&btrfs_comp_ws[i].ws_wait);
770
771 /*
772 * Preallocate one workspace for each compression type so
773 * we can guarantee forward progress in the worst case
774 */
775 workspace = btrfs_compress_op[i]->alloc_workspace();
776 if (IS_ERR(workspace)) {
777 printk(KERN_WARNING
778 "BTRFS: cannot preallocate compression workspace, will try later");
779 } else {
780 atomic_set(&btrfs_comp_ws[i].total_ws, 1);
781 btrfs_comp_ws[i].free_ws = 1;
782 list_add(workspace, &btrfs_comp_ws[i].idle_ws);
783 }
765 } 784 }
766} 785}
767 786
768/* 787/*
769 * this finds an available workspace or allocates a new one 788 * This finds an available workspace or allocates a new one.
770 * ERR_PTR is returned if things go bad. 789 * If it's not possible to allocate a new one, waits until there's one.
790 * Preallocation makes a forward progress guarantees and we do not return
791 * errors.
771 */ 792 */
772static struct list_head *find_workspace(int type) 793static struct list_head *find_workspace(int type)
773{ 794{
@@ -777,36 +798,58 @@ static struct list_head *find_workspace(int type)
777 798
778 struct list_head *idle_ws = &btrfs_comp_ws[idx].idle_ws; 799 struct list_head *idle_ws = &btrfs_comp_ws[idx].idle_ws;
779 spinlock_t *ws_lock = &btrfs_comp_ws[idx].ws_lock; 800 spinlock_t *ws_lock = &btrfs_comp_ws[idx].ws_lock;
780 atomic_t *alloc_ws = &btrfs_comp_ws[idx].alloc_ws; 801 atomic_t *total_ws = &btrfs_comp_ws[idx].total_ws;
781 wait_queue_head_t *ws_wait = &btrfs_comp_ws[idx].ws_wait; 802 wait_queue_head_t *ws_wait = &btrfs_comp_ws[idx].ws_wait;
782 int *num_ws = &btrfs_comp_ws[idx].num_ws; 803 int *free_ws = &btrfs_comp_ws[idx].free_ws;
783again: 804again:
784 spin_lock(ws_lock); 805 spin_lock(ws_lock);
785 if (!list_empty(idle_ws)) { 806 if (!list_empty(idle_ws)) {
786 workspace = idle_ws->next; 807 workspace = idle_ws->next;
787 list_del(workspace); 808 list_del(workspace);
788 (*num_ws)--; 809 (*free_ws)--;
789 spin_unlock(ws_lock); 810 spin_unlock(ws_lock);
790 return workspace; 811 return workspace;
791 812
792 } 813 }
793 if (atomic_read(alloc_ws) > cpus) { 814 if (atomic_read(total_ws) > cpus) {
794 DEFINE_WAIT(wait); 815 DEFINE_WAIT(wait);
795 816
796 spin_unlock(ws_lock); 817 spin_unlock(ws_lock);
797 prepare_to_wait(ws_wait, &wait, TASK_UNINTERRUPTIBLE); 818 prepare_to_wait(ws_wait, &wait, TASK_UNINTERRUPTIBLE);
798 if (atomic_read(alloc_ws) > cpus && !*num_ws) 819 if (atomic_read(total_ws) > cpus && !*free_ws)
799 schedule(); 820 schedule();
800 finish_wait(ws_wait, &wait); 821 finish_wait(ws_wait, &wait);
801 goto again; 822 goto again;
802 } 823 }
803 atomic_inc(alloc_ws); 824 atomic_inc(total_ws);
804 spin_unlock(ws_lock); 825 spin_unlock(ws_lock);
805 826
806 workspace = btrfs_compress_op[idx]->alloc_workspace(); 827 workspace = btrfs_compress_op[idx]->alloc_workspace();
807 if (IS_ERR(workspace)) { 828 if (IS_ERR(workspace)) {
808 atomic_dec(alloc_ws); 829 atomic_dec(total_ws);
809 wake_up(ws_wait); 830 wake_up(ws_wait);
831
832 /*
833 * Do not return the error but go back to waiting. There's a
834 * workspace preallocated for each type and the compression
835 * time is bounded so we get to a workspace eventually. This
836 * makes our caller's life easier.
837 *
838 * To prevent silent and low-probability deadlocks (when the
839 * initial preallocation fails), check if there are any
840 * workspaces at all.
841 */
842 if (atomic_read(total_ws) == 0) {
843 static DEFINE_RATELIMIT_STATE(_rs,
844 /* once per minute */ 60 * HZ,
845 /* no burst */ 1);
846
847 if (__ratelimit(&_rs)) {
848 printk(KERN_WARNING
849 "no compression workspaces, low memory, retrying");
850 }
851 }
852 goto again;
810 } 853 }
811 return workspace; 854 return workspace;
812} 855}
@@ -820,21 +863,21 @@ static void free_workspace(int type, struct list_head *workspace)
820 int idx = type - 1; 863 int idx = type - 1;
821 struct list_head *idle_ws = &btrfs_comp_ws[idx].idle_ws; 864 struct list_head *idle_ws = &btrfs_comp_ws[idx].idle_ws;
822 spinlock_t *ws_lock = &btrfs_comp_ws[idx].ws_lock; 865 spinlock_t *ws_lock = &btrfs_comp_ws[idx].ws_lock;
823 atomic_t *alloc_ws = &btrfs_comp_ws[idx].alloc_ws; 866 atomic_t *total_ws = &btrfs_comp_ws[idx].total_ws;
824 wait_queue_head_t *ws_wait = &btrfs_comp_ws[idx].ws_wait; 867 wait_queue_head_t *ws_wait = &btrfs_comp_ws[idx].ws_wait;
825 int *num_ws = &btrfs_comp_ws[idx].num_ws; 868 int *free_ws = &btrfs_comp_ws[idx].free_ws;
826 869
827 spin_lock(ws_lock); 870 spin_lock(ws_lock);
828 if (*num_ws < num_online_cpus()) { 871 if (*free_ws < num_online_cpus()) {
829 list_add(workspace, idle_ws); 872 list_add(workspace, idle_ws);
830 (*num_ws)++; 873 (*free_ws)++;
831 spin_unlock(ws_lock); 874 spin_unlock(ws_lock);
832 goto wake; 875 goto wake;
833 } 876 }
834 spin_unlock(ws_lock); 877 spin_unlock(ws_lock);
835 878
836 btrfs_compress_op[idx]->free_workspace(workspace); 879 btrfs_compress_op[idx]->free_workspace(workspace);
837 atomic_dec(alloc_ws); 880 atomic_dec(total_ws);
838wake: 881wake:
839 /* 882 /*
840 * Make sure counter is updated before we wake up waiters. 883 * Make sure counter is updated before we wake up waiters.
@@ -857,7 +900,7 @@ static void free_workspaces(void)
857 workspace = btrfs_comp_ws[i].idle_ws.next; 900 workspace = btrfs_comp_ws[i].idle_ws.next;
858 list_del(workspace); 901 list_del(workspace);
859 btrfs_compress_op[i]->free_workspace(workspace); 902 btrfs_compress_op[i]->free_workspace(workspace);
860 atomic_dec(&btrfs_comp_ws[i].alloc_ws); 903 atomic_dec(&btrfs_comp_ws[i].total_ws);
861 } 904 }
862 } 905 }
863} 906}
@@ -894,8 +937,6 @@ int btrfs_compress_pages(int type, struct address_space *mapping,
894 int ret; 937 int ret;
895 938
896 workspace = find_workspace(type); 939 workspace = find_workspace(type);
897 if (IS_ERR(workspace))
898 return PTR_ERR(workspace);
899 940
900 ret = btrfs_compress_op[type-1]->compress_pages(workspace, mapping, 941 ret = btrfs_compress_op[type-1]->compress_pages(workspace, mapping,
901 start, len, pages, 942 start, len, pages,
@@ -930,8 +971,6 @@ static int btrfs_decompress_biovec(int type, struct page **pages_in,
930 int ret; 971 int ret;
931 972
932 workspace = find_workspace(type); 973 workspace = find_workspace(type);
933 if (IS_ERR(workspace))
934 return PTR_ERR(workspace);
935 974
936 ret = btrfs_compress_op[type-1]->decompress_biovec(workspace, pages_in, 975 ret = btrfs_compress_op[type-1]->decompress_biovec(workspace, pages_in,
937 disk_start, 976 disk_start,
@@ -952,8 +991,6 @@ int btrfs_decompress(int type, unsigned char *data_in, struct page *dest_page,
952 int ret; 991 int ret;
953 992
954 workspace = find_workspace(type); 993 workspace = find_workspace(type);
955 if (IS_ERR(workspace))
956 return PTR_ERR(workspace);
957 994
958 ret = btrfs_compress_op[type-1]->decompress(workspace, data_in, 995 ret = btrfs_compress_op[type-1]->decompress(workspace, data_in,
959 dest_page, start_byte, 996 dest_page, start_byte,