Merge branch 'for-linus-4.7' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs

Pull btrfs updates from Chris Mason: "This has our merge window series of cleanups and fixes. These target a wide range of issues, but do include some important fixes for qgroups, O_DIRECT, and fsync handling. Jeff Mahoney moved around a few definitions to make them easier for userland to consume. Also whiteout support is included now that issues with overlayfs have been cleared up. I have one more fix pending for page faults during btrfs_copy_from_user, but I wanted to get this bulk out the door first" * 'for-linus-4.7' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: (90 commits) btrfs: fix memory leak during RAID 5/6 device replacement Btrfs: add semaphore to synchronize direct IO writes with fsync Btrfs: fix race between block group relocation and nocow writes Btrfs: fix race between fsync and direct IO writes for prealloc extents Btrfs: fix number of transaction units for renames with whiteout Btrfs: pin logs earlier when doing a rename exchange operation Btrfs: unpin logs if rename exchange operation fails Btrfs: fix inode leak on failure to setup whiteout inode in rename btrfs: add support for RENAME_EXCHANGE and RENAME_WHITEOUT Btrfs: pin log earlier when renaming Btrfs: unpin log if rename operation fails Btrfs: don't do unnecessary delalloc flushes when relocating Btrfs: don't wait for unrelated IO to finish before relocation Btrfs: fix empty symlink after creating symlink and fsync parent dir Btrfs: fix for incorrect directory entries after fsync log replay btrfs: build fixup for qgroup_account_snapshot btrfs: qgroup: Fix qgroup accounting when creating snapshot Btrfs: fix fspath error deallocation btrfs: make find_workspace warn if there are no workspaces btrfs: make find_workspace always succeed ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2016-05-21 13:49:22 -0400
committer: Linus Torvalds <torvalds@linux-foundation.org> 2016-05-21 13:49:22 -0400
commit: 07be1337b9e8bfcd855c6e9175b5066a30ac609b (patch)
tree: e40ad01dc89f6eb17d461939b809fea3387fc2a5 /fs/btrfs/compression.c
parent: 63d222b9d277c4d7bf08afd1631a7f8e327a825c (diff)
parent: c315ef8d9db7f1a0ebd023a395ebdfde1c68057e (diff)
1 files changed, 61 insertions, 24 deletions
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index ff61a41ac90b..658c39b70fba 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -743,8 +743,11 @@ out:
 static struct {
        struct list_head idle_ws;
        spinlock_t ws_lock;
-        int num_ws;
+        /* Number of free workspaces */
-        atomic_t alloc_ws;
+        int free_ws;
+        /* Total number of allocated workspaces */
+        atomic_t total_ws;
+        /* Waiters for a free workspace */
        wait_queue_head_t ws_wait;
 } btrfs_comp_ws[BTRFS_COMPRESS_TYPES];
@@ -758,16 +761,34 @@ void __init btrfs_init_compress(void)
        int i;
        for (i = 0; i < BTRFS_COMPRESS_TYPES; i++) {
+                struct list_head *workspace;
                INIT_LIST_HEAD(&btrfs_comp_ws[i].idle_ws);
                spin_lock_init(&btrfs_comp_ws[i].ws_lock);
-                atomic_set(&btrfs_comp_ws[i].alloc_ws, 0);
+                atomic_set(&btrfs_comp_ws[i].total_ws, 0);
                init_waitqueue_head(&btrfs_comp_ws[i].ws_wait);
+                /*
+                 * Preallocate one workspace for each compression type so
+                 * we can guarantee forward progress in the worst case
+                 */
+                workspace = btrfs_compress_op[i]->alloc_workspace();
+                if (IS_ERR(workspace)) {
+                        printk(KERN_WARNING
+        "BTRFS: cannot preallocate compression workspace, will try later");
+                } else {
+                        atomic_set(&btrfs_comp_ws[i].total_ws, 1);
+                        btrfs_comp_ws[i].free_ws = 1;
+                        list_add(workspace, &btrfs_comp_ws[i].idle_ws);
+                }
        }
 }
 /*
- * this finds an available workspace or allocates a new one
+ * This finds an available workspace or allocates a new one.
- * ERR_PTR is returned if things go bad.
+ * If it's not possible to allocate a new one, waits until there's one.
+ * Preallocation makes a forward progress guarantees and we do not return
+ * errors.
 */
 static struct list_head *find_workspace(int type)
 {
@@ -777,36 +798,58 @@ static struct list_head *find_workspace(int type)
        struct list_head *idle_ws       = &btrfs_comp_ws[idx].idle_ws;
        spinlock_t *ws_lock             = &btrfs_comp_ws[idx].ws_lock;
-        atomic_t *alloc_ws              = &btrfs_comp_ws[idx].alloc_ws;
+        atomic_t *total_ws              = &btrfs_comp_ws[idx].total_ws;
        wait_queue_head_t *ws_wait      = &btrfs_comp_ws[idx].ws_wait;
-        int *num_ws                     = &btrfs_comp_ws[idx].num_ws;
+        int *free_ws                    = &btrfs_comp_ws[idx].free_ws;
 again:
        spin_lock(ws_lock);
        if (!list_empty(idle_ws)) {
                workspace = idle_ws->next;
                list_del(workspace);
-                (*num_ws)--;
+                (*free_ws)--;
                spin_unlock(ws_lock);
                return workspace;
        }
-        if (atomic_read(alloc_ws) > cpus) {
+        if (atomic_read(total_ws) > cpus) {
                DEFINE_WAIT(wait);
                spin_unlock(ws_lock);
                prepare_to_wait(ws_wait, &wait, TASK_UNINTERRUPTIBLE);
-                if (atomic_read(alloc_ws) > cpus && !*num_ws)
+                if (atomic_read(total_ws) > cpus && !*free_ws)
                        schedule();
                finish_wait(ws_wait, &wait);
                goto again;
        }
-        atomic_inc(alloc_ws);
+        atomic_inc(total_ws);
        spin_unlock(ws_lock);
        workspace = btrfs_compress_op[idx]->alloc_workspace();
        if (IS_ERR(workspace)) {
-                atomic_dec(alloc_ws);
+                atomic_dec(total_ws);
                wake_up(ws_wait);
+                /*
+                 * Do not return the error but go back to waiting. There's a
+                 * workspace preallocated for each type and the compression
+                 * time is bounded so we get to a workspace eventually. This
+                 * makes our caller's life easier.
+                 *
+                 * To prevent silent and low-probability deadlocks (when the
+                 * initial preallocation fails), check if there are any
+                 * workspaces at all.
+                 */
+                if (atomic_read(total_ws) == 0) {
+                        static DEFINE_RATELIMIT_STATE(_rs,
+                                        /* once per minute */ 60 * HZ,
+                                        /* no burst */ 1);
+                        if (__ratelimit(&_rs)) {
+                                printk(KERN_WARNING
+                            "no compression workspaces, low memory, retrying");
+                        }
+                }
+                goto again;
        }
        return workspace;
 }
@@ -820,21 +863,21 @@ static void free_workspace(int type, struct list_head *workspace)
        int idx = type - 1;
        struct list_head *idle_ws       = &btrfs_comp_ws[idx].idle_ws;
        spinlock_t *ws_lock             = &btrfs_comp_ws[idx].ws_lock;
-        atomic_t *alloc_ws              = &btrfs_comp_ws[idx].alloc_ws;
+        atomic_t *total_ws              = &btrfs_comp_ws[idx].total_ws;
        wait_queue_head_t *ws_wait      = &btrfs_comp_ws[idx].ws_wait;
-        int *num_ws                     = &btrfs_comp_ws[idx].num_ws;
+        int *free_ws                    = &btrfs_comp_ws[idx].free_ws;
        spin_lock(ws_lock);
-        if (*num_ws < num_online_cpus()) {
+        if (*free_ws < num_online_cpus()) {
                list_add(workspace, idle_ws);
-                (*num_ws)++;
+                (*free_ws)++;
                spin_unlock(ws_lock);
                goto wake;
        }
        spin_unlock(ws_lock);
        btrfs_compress_op[idx]->free_workspace(workspace);
-        atomic_dec(alloc_ws);
+        atomic_dec(total_ws);
 wake:
        /*
         * Make sure counter is updated before we wake up waiters.
@@ -857,7 +900,7 @@ static void free_workspaces(void)
                        workspace = btrfs_comp_ws[i].idle_ws.next;
                        list_del(workspace);
                        btrfs_compress_op[i]->free_workspace(workspace);
-                        atomic_dec(&btrfs_comp_ws[i].alloc_ws);
+                        atomic_dec(&btrfs_comp_ws[i].total_ws);
                }
        }
 }
@@ -894,8 +937,6 @@ int btrfs_compress_pages(int type, struct address_space *mapping,
        int ret;
        workspace = find_workspace(type);
-        if (IS_ERR(workspace))
-                return PTR_ERR(workspace);
        ret = btrfs_compress_op[type-1]->compress_pages(workspace, mapping,
                                                      start, len, pages,
@@ -930,8 +971,6 @@ static int btrfs_decompress_biovec(int type, struct page **pages_in,
        int ret;
        workspace = find_workspace(type);
-        if (IS_ERR(workspace))
-                return PTR_ERR(workspace);
        ret = btrfs_compress_op[type-1]->decompress_biovec(workspace, pages_in,
                                                         disk_start,
@@ -952,8 +991,6 @@ int btrfs_decompress(int type, unsigned char *data_in, struct page *dest_page,
        int ret;
        workspace = find_workspace(type);
-        if (IS_ERR(workspace))
-                return PTR_ERR(workspace);
        ret = btrfs_compress_op[type-1]->decompress(workspace, data_in,
                                                  dest_page, start_byte,
author	Linus Torvalds <torvalds@linux-foundation.org>	2016-05-21 13:49:22 -0400
committer	Linus Torvalds <torvalds@linux-foundation.org>	2016-05-21 13:49:22 -0400
commit	07be1337b9e8bfcd855c6e9175b5066a30ac609b (patch)
tree	e40ad01dc89f6eb17d461939b809fea3387fc2a5 /fs/btrfs/compression.c
parent	63d222b9d277c4d7bf08afd1631a7f8e327a825c (diff)
parent	c315ef8d9db7f1a0ebd023a395ebdfde1c68057e (diff)