diff options
author | Jan Kara <jack@suse.cz> | 2011-06-24 14:29:41 -0400 |
---|---|---|
committer | Al Viro <viro@zeniv.linux.org.uk> | 2011-07-20 20:47:45 -0400 |
commit | 9ea7df534ed2a18157434a496a12cf073ca00c52 (patch) | |
tree | f8fa09102093cbc60249f96ec4fb91985ae8659b /fs/ext4/inode.c | |
parent | 582686915803e34adc8fdcd90bff7ca7f6a42221 (diff) |
ext4: Rewrite ext4_page_mkwrite() to use generic helpers
Rewrite ext4_page_mkwrite() to use __block_page_mkwrite() helper. This
removes the need of using i_alloc_sem to avoid races with truncate which
seems to be the wrong locking order according to lock ordering documented in
mm/rmap.c. Also calling ext4_da_write_begin() as used by the old code seems to
be problematic because we can decide to flush delay-allocated blocks which
will acquire s_umount semaphore - again creating unpleasant lock dependency
if not directly a deadlock.
Also add a check for frozen filesystem so that we don't busyloop in page fault
when the filesystem is frozen.
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Diffstat (limited to 'fs/ext4/inode.c')
-rw-r--r-- | fs/ext4/inode.c | 106 |
1 files changed, 55 insertions, 51 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index e3126c051006..bd309764557f 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -5843,80 +5843,84 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
5843 | struct page *page = vmf->page; | 5843 | struct page *page = vmf->page; |
5844 | loff_t size; | 5844 | loff_t size; |
5845 | unsigned long len; | 5845 | unsigned long len; |
5846 | int ret = -EINVAL; | 5846 | int ret; |
5847 | void *fsdata; | ||
5848 | struct file *file = vma->vm_file; | 5847 | struct file *file = vma->vm_file; |
5849 | struct inode *inode = file->f_path.dentry->d_inode; | 5848 | struct inode *inode = file->f_path.dentry->d_inode; |
5850 | struct address_space *mapping = inode->i_mapping; | 5849 | struct address_space *mapping = inode->i_mapping; |
5850 | handle_t *handle; | ||
5851 | get_block_t *get_block; | ||
5852 | int retries = 0; | ||
5851 | 5853 | ||
5852 | /* | 5854 | /* |
5853 | * Get i_alloc_sem to stop truncates messing with the inode. We cannot | 5855 | * This check is racy but catches the common case. We rely on |
5854 | * get i_mutex because we are already holding mmap_sem. | 5856 | * __block_page_mkwrite() to do a reliable check. |
5855 | */ | 5857 | */ |
5856 | down_read(&inode->i_alloc_sem); | 5858 | vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); |
5857 | size = i_size_read(inode); | 5859 | /* Delalloc case is easy... */ |
5858 | if (page->mapping != mapping || size <= page_offset(page) | 5860 | if (test_opt(inode->i_sb, DELALLOC) && |
5859 | || !PageUptodate(page)) { | 5861 | !ext4_should_journal_data(inode) && |
5860 | /* page got truncated from under us? */ | 5862 | !ext4_nonda_switch(inode->i_sb)) { |
5861 | goto out_unlock; | 5863 | do { |
5864 | ret = __block_page_mkwrite(vma, vmf, | ||
5865 | ext4_da_get_block_prep); | ||
5866 | } while (ret == -ENOSPC && | ||
5867 | ext4_should_retry_alloc(inode->i_sb, &retries)); | ||
5868 | goto out_ret; | ||
5862 | } | 5869 | } |
5863 | ret = 0; | ||
5864 | 5870 | ||
5865 | lock_page(page); | 5871 | lock_page(page); |
5866 | wait_on_page_writeback(page); | 5872 | size = i_size_read(inode); |
5867 | if (PageMappedToDisk(page)) { | 5873 | /* Page got truncated from under us? */ |
5868 | up_read(&inode->i_alloc_sem); | 5874 | if (page->mapping != mapping || page_offset(page) > size) { |
5869 | return VM_FAULT_LOCKED; | 5875 | unlock_page(page); |
5876 | ret = VM_FAULT_NOPAGE; | ||
5877 | goto out; | ||
5870 | } | 5878 | } |
5871 | 5879 | ||
5872 | if (page->index == size >> PAGE_CACHE_SHIFT) | 5880 | if (page->index == size >> PAGE_CACHE_SHIFT) |
5873 | len = size & ~PAGE_CACHE_MASK; | 5881 | len = size & ~PAGE_CACHE_MASK; |
5874 | else | 5882 | else |
5875 | len = PAGE_CACHE_SIZE; | 5883 | len = PAGE_CACHE_SIZE; |
5876 | |||
5877 | /* | 5884 | /* |
5878 | * return if we have all the buffers mapped. This avoid | 5885 | * Return if we have all the buffers mapped. This avoids the need to do |
5879 | * the need to call write_begin/write_end which does a | 5886 | * journal_start/journal_stop which can block and take a long time |
5880 | * journal_start/journal_stop which can block and take | ||
5881 | * long time | ||
5882 | */ | 5887 | */ |
5883 | if (page_has_buffers(page)) { | 5888 | if (page_has_buffers(page)) { |
5884 | if (!walk_page_buffers(NULL, page_buffers(page), 0, len, NULL, | 5889 | if (!walk_page_buffers(NULL, page_buffers(page), 0, len, NULL, |
5885 | ext4_bh_unmapped)) { | 5890 | ext4_bh_unmapped)) { |
5886 | up_read(&inode->i_alloc_sem); | 5891 | /* Wait so that we don't change page under IO */ |
5887 | return VM_FAULT_LOCKED; | 5892 | wait_on_page_writeback(page); |
5893 | ret = VM_FAULT_LOCKED; | ||
5894 | goto out; | ||
5888 | } | 5895 | } |
5889 | } | 5896 | } |
5890 | unlock_page(page); | 5897 | unlock_page(page); |
5891 | /* | 5898 | /* OK, we need to fill the hole... */ |
5892 | * OK, we need to fill the hole... Do write_begin write_end | 5899 | if (ext4_should_dioread_nolock(inode)) |
5893 | * to do block allocation/reservation.We are not holding | 5900 | get_block = ext4_get_block_write; |
5894 | * inode.i__mutex here. That allow * parallel write_begin, | 5901 | else |
5895 | * write_end call. lock_page prevent this from happening | 5902 | get_block = ext4_get_block; |
5896 | * on the same page though | 5903 | retry_alloc: |
5897 | */ | 5904 | handle = ext4_journal_start(inode, ext4_writepage_trans_blocks(inode)); |
5898 | ret = mapping->a_ops->write_begin(file, mapping, page_offset(page), | 5905 | if (IS_ERR(handle)) { |
5899 | len, AOP_FLAG_UNINTERRUPTIBLE, &page, &fsdata); | ||
5900 | if (ret < 0) | ||
5901 | goto out_unlock; | ||
5902 | ret = mapping->a_ops->write_end(file, mapping, page_offset(page), | ||
5903 | len, len, page, fsdata); | ||
5904 | if (ret < 0) | ||
5905 | goto out_unlock; | ||
5906 | ret = 0; | ||
5907 | |||
5908 | /* | ||
5909 | * write_begin/end might have created a dirty page and someone | ||
5910 | * could wander in and start the IO. Make sure that hasn't | ||
5911 | * happened. | ||
5912 | */ | ||
5913 | lock_page(page); | ||
5914 | wait_on_page_writeback(page); | ||
5915 | up_read(&inode->i_alloc_sem); | ||
5916 | return VM_FAULT_LOCKED; | ||
5917 | out_unlock: | ||
5918 | if (ret) | ||
5919 | ret = VM_FAULT_SIGBUS; | 5906 | ret = VM_FAULT_SIGBUS; |
5920 | up_read(&inode->i_alloc_sem); | 5907 | goto out; |
5908 | } | ||
5909 | ret = __block_page_mkwrite(vma, vmf, get_block); | ||
5910 | if (!ret && ext4_should_journal_data(inode)) { | ||
5911 | if (walk_page_buffers(handle, page_buffers(page), 0, | ||
5912 | PAGE_CACHE_SIZE, NULL, do_journal_get_write_access)) { | ||
5913 | unlock_page(page); | ||
5914 | ret = VM_FAULT_SIGBUS; | ||
5915 | goto out; | ||
5916 | } | ||
5917 | ext4_set_inode_state(inode, EXT4_STATE_JDATA); | ||
5918 | } | ||
5919 | ext4_journal_stop(handle); | ||
5920 | if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) | ||
5921 | goto retry_alloc; | ||
5922 | out_ret: | ||
5923 | ret = block_page_mkwrite_return(ret); | ||
5924 | out: | ||
5921 | return ret; | 5925 | return ret; |
5922 | } | 5926 | } |