ext4: Rewrite ext4_page_mkwrite() to use generic helpers

Rewrite ext4_page_mkwrite() to use __block_page_mkwrite() helper. This removes the need of using i_alloc_sem to avoid races with truncate which seems to be the wrong locking order according to lock ordering documented in mm/rmap.c. Also calling ext4_da_write_begin() as used by the old code seems to be problematic because we can decide to flush delay-allocated blocks which will acquire s_umount semaphore - again creating unpleasant lock dependency if not directly a deadlock. Also add a check for frozen filesystem so that we don't busyloop in page fault when the filesystem is frozen. Signed-off-by: Jan Kara <jack@suse.cz> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
author: Jan Kara <jack@suse.cz> 2011-06-24 14:29:41 -0400
committer: Al Viro <viro@zeniv.linux.org.uk> 2011-07-20 20:47:45 -0400
commit: 9ea7df534ed2a18157434a496a12cf073ca00c52 (patch)
tree: f8fa09102093cbc60249f96ec4fb91985ae8659b /fs
parent: 582686915803e34adc8fdcd90bff7ca7f6a42221 (diff)
1 files changed, 55 insertions, 51 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index e3126c05100..bd309764557 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -5843,80 +5843,84 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
        struct page *page = vmf->page;
        loff_t size;
        unsigned long len;
-        int ret = -EINVAL;
+        int ret;
-        void *fsdata;
        struct file *file = vma->vm_file;
        struct inode *inode = file->f_path.dentry->d_inode;
        struct address_space *mapping = inode->i_mapping;
+        handle_t *handle;
+        get_block_t *get_block;
+        int retries = 0;
        /*
-         * Get i_alloc_sem to stop truncates messing with the inode. We cannot
+         * This check is racy but catches the common case. We rely on
-         * get i_mutex because we are already holding mmap_sem.
+         * __block_page_mkwrite() to do a reliable check.
         */
-        down_read(&inode->i_alloc_sem);
+        vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
-        size = i_size_read(inode);
+        /* Delalloc case is easy... */
-        if (page->mapping != mapping || size <= page_offset(page)
+        if (test_opt(inode->i_sb, DELALLOC) &&
-            || !PageUptodate(page)) {
+            !ext4_should_journal_data(inode) &&
-                /* page got truncated from under us? */
+            !ext4_nonda_switch(inode->i_sb)) {
-                goto out_unlock;
+                do {
+                        ret = __block_page_mkwrite(vma, vmf,
+                                                   ext4_da_get_block_prep);
+                } while (ret == -ENOSPC &&
+                       ext4_should_retry_alloc(inode->i_sb, &retries));
+                goto out_ret;
        }
-        ret = 0;
        lock_page(page);
-        wait_on_page_writeback(page);
+        size = i_size_read(inode);
-        if (PageMappedToDisk(page)) {
+        /* Page got truncated from under us? */
-                up_read(&inode->i_alloc_sem);
+        if (page->mapping != mapping || page_offset(page) > size) {
-                return VM_FAULT_LOCKED;
+                unlock_page(page);
+                ret = VM_FAULT_NOPAGE;
+                goto out;
        }
        if (page->index == size >> PAGE_CACHE_SHIFT)
                len = size & ~PAGE_CACHE_MASK;
        else
                len = PAGE_CACHE_SIZE;
        /*
-         * return if we have all the buffers mapped. This avoid
+         * Return if we have all the buffers mapped. This avoids the need to do
-         * the need to call write_begin/write_end which does a
+         * journal_start/journal_stop which can block and take a long time
-         * journal_start/journal_stop which can block and take
-         * long time
         */
        if (page_has_buffers(page)) {
                if (!walk_page_buffers(NULL, page_buffers(page), 0, len, NULL,
                                        ext4_bh_unmapped)) {
-                        up_read(&inode->i_alloc_sem);
+                        /* Wait so that we don't change page under IO */
-                        return VM_FAULT_LOCKED;
+                        wait_on_page_writeback(page);
+                        ret = VM_FAULT_LOCKED;
+                        goto out;
                }
        }
        unlock_page(page);
-        /*
+        /* OK, we need to fill the hole... */
-         * OK, we need to fill the hole... Do write_begin write_end
+        if (ext4_should_dioread_nolock(inode))
-         * to do block allocation/reservation.We are not holding
+                get_block = ext4_get_block_write;
-         * inode.i__mutex here. That allow * parallel write_begin,
+        else
-         * write_end call. lock_page prevent this from happening
+                get_block = ext4_get_block;
-         * on the same page though
+retry_alloc:
-         */
+        handle = ext4_journal_start(inode, ext4_writepage_trans_blocks(inode));
-        ret = mapping->a_ops->write_begin(file, mapping, page_offset(page),
+        if (IS_ERR(handle)) {
-                        len, AOP_FLAG_UNINTERRUPTIBLE, &page, &fsdata);
-        if (ret < 0)
-                goto out_unlock;
-        ret = mapping->a_ops->write_end(file, mapping, page_offset(page),
-                        len, len, page, fsdata);
-        if (ret < 0)
-                goto out_unlock;
-        ret = 0;
-        /*
-         * write_begin/end might have created a dirty page and someone
-         * could wander in and start the IO.  Make sure that hasn't
-         * happened.
-         */
-        lock_page(page);
-        wait_on_page_writeback(page);
-        up_read(&inode->i_alloc_sem);
-        return VM_FAULT_LOCKED;
-out_unlock:
-        if (ret)
                ret = VM_FAULT_SIGBUS;
-        up_read(&inode->i_alloc_sem);
+                goto out;
+        }
+        ret = __block_page_mkwrite(vma, vmf, get_block);
+        if (!ret && ext4_should_journal_data(inode)) {
+                if (walk_page_buffers(handle, page_buffers(page), 0,
+                          PAGE_CACHE_SIZE, NULL, do_journal_get_write_access)) {
+                        unlock_page(page);
+                        ret = VM_FAULT_SIGBUS;
+                        goto out;
+                }
+                ext4_set_inode_state(inode, EXT4_STATE_JDATA);
+        }
+        ext4_journal_stop(handle);
+        if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
+                goto retry_alloc;
+out_ret:
+        ret = block_page_mkwrite_return(ret);
+out:
        return ret;
 }
author	Jan Kara <jack@suse.cz>	2011-06-24 14:29:41 -0400
committer	Al Viro <viro@zeniv.linux.org.uk>	2011-07-20 20:47:45 -0400
commit	9ea7df534ed2a18157434a496a12cf073ca00c52 (patch)
tree	f8fa09102093cbc60249f96ec4fb91985ae8659b /fs
parent	582686915803e34adc8fdcd90bff7ca7f6a42221 (diff)