aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorJan Kara <jack@suse.cz>2011-06-24 14:29:41 -0400
committerAl Viro <viro@zeniv.linux.org.uk>2011-07-20 20:47:45 -0400
commit9ea7df534ed2a18157434a496a12cf073ca00c52 (patch)
treef8fa09102093cbc60249f96ec4fb91985ae8659b /fs
parent582686915803e34adc8fdcd90bff7ca7f6a42221 (diff)
ext4: Rewrite ext4_page_mkwrite() to use generic helpers
Rewrite ext4_page_mkwrite() to use __block_page_mkwrite() helper. This removes the need of using i_alloc_sem to avoid races with truncate which seems to be the wrong locking order according to lock ordering documented in mm/rmap.c. Also calling ext4_da_write_begin() as used by the old code seems to be problematic because we can decide to flush delay-allocated blocks which will acquire s_umount semaphore - again creating unpleasant lock dependency if not directly a deadlock. Also add a check for frozen filesystem so that we don't busyloop in page fault when the filesystem is frozen. Signed-off-by: Jan Kara <jack@suse.cz> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Diffstat (limited to 'fs')
-rw-r--r--fs/ext4/inode.c106
1 files changed, 55 insertions, 51 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index e3126c05100..bd309764557 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -5843,80 +5843,84 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
5843 struct page *page = vmf->page; 5843 struct page *page = vmf->page;
5844 loff_t size; 5844 loff_t size;
5845 unsigned long len; 5845 unsigned long len;
5846 int ret = -EINVAL; 5846 int ret;
5847 void *fsdata;
5848 struct file *file = vma->vm_file; 5847 struct file *file = vma->vm_file;
5849 struct inode *inode = file->f_path.dentry->d_inode; 5848 struct inode *inode = file->f_path.dentry->d_inode;
5850 struct address_space *mapping = inode->i_mapping; 5849 struct address_space *mapping = inode->i_mapping;
5850 handle_t *handle;
5851 get_block_t *get_block;
5852 int retries = 0;
5851 5853
5852 /* 5854 /*
5853 * Get i_alloc_sem to stop truncates messing with the inode. We cannot 5855 * This check is racy but catches the common case. We rely on
5854 * get i_mutex because we are already holding mmap_sem. 5856 * __block_page_mkwrite() to do a reliable check.
5855 */ 5857 */
5856 down_read(&inode->i_alloc_sem); 5858 vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
5857 size = i_size_read(inode); 5859 /* Delalloc case is easy... */
5858 if (page->mapping != mapping || size <= page_offset(page) 5860 if (test_opt(inode->i_sb, DELALLOC) &&
5859 || !PageUptodate(page)) { 5861 !ext4_should_journal_data(inode) &&
5860 /* page got truncated from under us? */ 5862 !ext4_nonda_switch(inode->i_sb)) {
5861 goto out_unlock; 5863 do {
5864 ret = __block_page_mkwrite(vma, vmf,
5865 ext4_da_get_block_prep);
5866 } while (ret == -ENOSPC &&
5867 ext4_should_retry_alloc(inode->i_sb, &retries));
5868 goto out_ret;
5862 } 5869 }
5863 ret = 0;
5864 5870
5865 lock_page(page); 5871 lock_page(page);
5866 wait_on_page_writeback(page); 5872 size = i_size_read(inode);
5867 if (PageMappedToDisk(page)) { 5873 /* Page got truncated from under us? */
5868 up_read(&inode->i_alloc_sem); 5874 if (page->mapping != mapping || page_offset(page) > size) {
5869 return VM_FAULT_LOCKED; 5875 unlock_page(page);
5876 ret = VM_FAULT_NOPAGE;
5877 goto out;
5870 } 5878 }
5871 5879
5872 if (page->index == size >> PAGE_CACHE_SHIFT) 5880 if (page->index == size >> PAGE_CACHE_SHIFT)
5873 len = size & ~PAGE_CACHE_MASK; 5881 len = size & ~PAGE_CACHE_MASK;
5874 else 5882 else
5875 len = PAGE_CACHE_SIZE; 5883 len = PAGE_CACHE_SIZE;
5876
5877 /* 5884 /*
5878 * return if we have all the buffers mapped. This avoid 5885 * Return if we have all the buffers mapped. This avoids the need to do
5879 * the need to call write_begin/write_end which does a 5886 * journal_start/journal_stop which can block and take a long time
5880 * journal_start/journal_stop which can block and take
5881 * long time
5882 */ 5887 */
5883 if (page_has_buffers(page)) { 5888 if (page_has_buffers(page)) {
5884 if (!walk_page_buffers(NULL, page_buffers(page), 0, len, NULL, 5889 if (!walk_page_buffers(NULL, page_buffers(page), 0, len, NULL,
5885 ext4_bh_unmapped)) { 5890 ext4_bh_unmapped)) {
5886 up_read(&inode->i_alloc_sem); 5891 /* Wait so that we don't change page under IO */
5887 return VM_FAULT_LOCKED; 5892 wait_on_page_writeback(page);
5893 ret = VM_FAULT_LOCKED;
5894 goto out;
5888 } 5895 }
5889 } 5896 }
5890 unlock_page(page); 5897 unlock_page(page);
5891 /* 5898 /* OK, we need to fill the hole... */
5892 * OK, we need to fill the hole... Do write_begin write_end 5899 if (ext4_should_dioread_nolock(inode))
5893 * to do block allocation/reservation.We are not holding 5900 get_block = ext4_get_block_write;
5894 * inode.i__mutex here. That allow * parallel write_begin, 5901 else
5895 * write_end call. lock_page prevent this from happening 5902 get_block = ext4_get_block;
5896 * on the same page though 5903retry_alloc:
5897 */ 5904 handle = ext4_journal_start(inode, ext4_writepage_trans_blocks(inode));
5898 ret = mapping->a_ops->write_begin(file, mapping, page_offset(page), 5905 if (IS_ERR(handle)) {
5899 len, AOP_FLAG_UNINTERRUPTIBLE, &page, &fsdata);
5900 if (ret < 0)
5901 goto out_unlock;
5902 ret = mapping->a_ops->write_end(file, mapping, page_offset(page),
5903 len, len, page, fsdata);
5904 if (ret < 0)
5905 goto out_unlock;
5906 ret = 0;
5907
5908 /*
5909 * write_begin/end might have created a dirty page and someone
5910 * could wander in and start the IO. Make sure that hasn't
5911 * happened.
5912 */
5913 lock_page(page);
5914 wait_on_page_writeback(page);
5915 up_read(&inode->i_alloc_sem);
5916 return VM_FAULT_LOCKED;
5917out_unlock:
5918 if (ret)
5919 ret = VM_FAULT_SIGBUS; 5906 ret = VM_FAULT_SIGBUS;
5920 up_read(&inode->i_alloc_sem); 5907 goto out;
5908 }
5909 ret = __block_page_mkwrite(vma, vmf, get_block);
5910 if (!ret && ext4_should_journal_data(inode)) {
5911 if (walk_page_buffers(handle, page_buffers(page), 0,
5912 PAGE_CACHE_SIZE, NULL, do_journal_get_write_access)) {
5913 unlock_page(page);
5914 ret = VM_FAULT_SIGBUS;
5915 goto out;
5916 }
5917 ext4_set_inode_state(inode, EXT4_STATE_JDATA);
5918 }
5919 ext4_journal_stop(handle);
5920 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
5921 goto retry_alloc;
5922out_ret:
5923 ret = block_page_mkwrite_return(ret);
5924out:
5921 return ret; 5925 return ret;
5922} 5926}