diff options
Diffstat (limited to 'fs/ext4')
-rw-r--r-- | fs/ext4/acl.c | 4 | ||||
-rw-r--r-- | fs/ext4/acl.h | 2 | ||||
-rw-r--r-- | fs/ext4/ext4.h | 2 | ||||
-rw-r--r-- | fs/ext4/file.c | 21 | ||||
-rw-r--r-- | fs/ext4/fsync.c | 38 | ||||
-rw-r--r-- | fs/ext4/inode.c | 125 | ||||
-rw-r--r-- | fs/ext4/namei.c | 14 |
7 files changed, 133 insertions, 73 deletions
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c index 21eacd7b7d79..60d900fcc3db 100644 --- a/fs/ext4/acl.c +++ b/fs/ext4/acl.c | |||
@@ -238,11 +238,11 @@ ext4_set_acl(handle_t *handle, struct inode *inode, int type, | |||
238 | } | 238 | } |
239 | 239 | ||
240 | int | 240 | int |
241 | ext4_check_acl(struct inode *inode, int mask, unsigned int flags) | 241 | ext4_check_acl(struct inode *inode, int mask) |
242 | { | 242 | { |
243 | struct posix_acl *acl; | 243 | struct posix_acl *acl; |
244 | 244 | ||
245 | if (flags & IPERM_FLAG_RCU) { | 245 | if (mask & MAY_NOT_BLOCK) { |
246 | if (!negative_cached_acl(inode, ACL_TYPE_ACCESS)) | 246 | if (!negative_cached_acl(inode, ACL_TYPE_ACCESS)) |
247 | return -ECHILD; | 247 | return -ECHILD; |
248 | return -EAGAIN; | 248 | return -EAGAIN; |
diff --git a/fs/ext4/acl.h b/fs/ext4/acl.h index dec821168fd4..9d843d5deac4 100644 --- a/fs/ext4/acl.h +++ b/fs/ext4/acl.h | |||
@@ -54,7 +54,7 @@ static inline int ext4_acl_count(size_t size) | |||
54 | #ifdef CONFIG_EXT4_FS_POSIX_ACL | 54 | #ifdef CONFIG_EXT4_FS_POSIX_ACL |
55 | 55 | ||
56 | /* acl.c */ | 56 | /* acl.c */ |
57 | extern int ext4_check_acl(struct inode *, int, unsigned int); | 57 | extern int ext4_check_acl(struct inode *, int); |
58 | extern int ext4_acl_chmod(struct inode *); | 58 | extern int ext4_acl_chmod(struct inode *); |
59 | extern int ext4_init_acl(handle_t *, struct inode *, struct inode *); | 59 | extern int ext4_init_acl(handle_t *, struct inode *, struct inode *); |
60 | 60 | ||
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 1921392cd708..fa44df879711 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h | |||
@@ -1758,7 +1758,7 @@ extern int ext4_htree_store_dirent(struct file *dir_file, __u32 hash, | |||
1758 | extern void ext4_htree_free_dir_info(struct dir_private_info *p); | 1758 | extern void ext4_htree_free_dir_info(struct dir_private_info *p); |
1759 | 1759 | ||
1760 | /* fsync.c */ | 1760 | /* fsync.c */ |
1761 | extern int ext4_sync_file(struct file *, int); | 1761 | extern int ext4_sync_file(struct file *, loff_t, loff_t, int); |
1762 | extern int ext4_flush_completed_IO(struct inode *); | 1762 | extern int ext4_flush_completed_IO(struct inode *); |
1763 | 1763 | ||
1764 | /* hash.c */ | 1764 | /* hash.c */ |
diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 2c0972322009..ce766f974b1d 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c | |||
@@ -236,6 +236,27 @@ loff_t ext4_llseek(struct file *file, loff_t offset, int origin) | |||
236 | } | 236 | } |
237 | offset += file->f_pos; | 237 | offset += file->f_pos; |
238 | break; | 238 | break; |
239 | case SEEK_DATA: | ||
240 | /* | ||
241 | * In the generic case the entire file is data, so as long as | ||
242 | * offset isn't at the end of the file then the offset is data. | ||
243 | */ | ||
244 | if (offset >= inode->i_size) { | ||
245 | mutex_unlock(&inode->i_mutex); | ||
246 | return -ENXIO; | ||
247 | } | ||
248 | break; | ||
249 | case SEEK_HOLE: | ||
250 | /* | ||
251 | * There is a virtual hole at the end of the file, so as long as | ||
252 | * offset isn't i_size or larger, return i_size. | ||
253 | */ | ||
254 | if (offset >= inode->i_size) { | ||
255 | mutex_unlock(&inode->i_mutex); | ||
256 | return -ENXIO; | ||
257 | } | ||
258 | offset = inode->i_size; | ||
259 | break; | ||
239 | } | 260 | } |
240 | 261 | ||
241 | if (offset < 0 || offset > maxbytes) { | 262 | if (offset < 0 || offset > maxbytes) { |
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index ce66d2fe826c..da3bed3e0c29 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c | |||
@@ -151,6 +151,32 @@ static int ext4_sync_parent(struct inode *inode) | |||
151 | return ret; | 151 | return ret; |
152 | } | 152 | } |
153 | 153 | ||
154 | /** | ||
155 | * __sync_file - generic_file_fsync without the locking and filemap_write | ||
156 | * @inode: inode to sync | ||
157 | * @datasync: only sync essential metadata if true | ||
158 | * | ||
159 | * This is just generic_file_fsync without the locking. This is needed for | ||
160 | * nojournal mode to make sure this inodes data/metadata makes it to disk | ||
161 | * properly. The i_mutex should be held already. | ||
162 | */ | ||
163 | static int __sync_inode(struct inode *inode, int datasync) | ||
164 | { | ||
165 | int err; | ||
166 | int ret; | ||
167 | |||
168 | ret = sync_mapping_buffers(inode->i_mapping); | ||
169 | if (!(inode->i_state & I_DIRTY)) | ||
170 | return ret; | ||
171 | if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) | ||
172 | return ret; | ||
173 | |||
174 | err = sync_inode_metadata(inode, 1); | ||
175 | if (ret == 0) | ||
176 | ret = err; | ||
177 | return ret; | ||
178 | } | ||
179 | |||
154 | /* | 180 | /* |
155 | * akpm: A new design for ext4_sync_file(). | 181 | * akpm: A new design for ext4_sync_file(). |
156 | * | 182 | * |
@@ -165,7 +191,7 @@ static int ext4_sync_parent(struct inode *inode) | |||
165 | * i_mutex lock is held when entering and exiting this function | 191 | * i_mutex lock is held when entering and exiting this function |
166 | */ | 192 | */ |
167 | 193 | ||
168 | int ext4_sync_file(struct file *file, int datasync) | 194 | int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync) |
169 | { | 195 | { |
170 | struct inode *inode = file->f_mapping->host; | 196 | struct inode *inode = file->f_mapping->host; |
171 | struct ext4_inode_info *ei = EXT4_I(inode); | 197 | struct ext4_inode_info *ei = EXT4_I(inode); |
@@ -178,15 +204,20 @@ int ext4_sync_file(struct file *file, int datasync) | |||
178 | 204 | ||
179 | trace_ext4_sync_file_enter(file, datasync); | 205 | trace_ext4_sync_file_enter(file, datasync); |
180 | 206 | ||
207 | ret = filemap_write_and_wait_range(inode->i_mapping, start, end); | ||
208 | if (ret) | ||
209 | return ret; | ||
210 | mutex_lock(&inode->i_mutex); | ||
211 | |||
181 | if (inode->i_sb->s_flags & MS_RDONLY) | 212 | if (inode->i_sb->s_flags & MS_RDONLY) |
182 | return 0; | 213 | goto out; |
183 | 214 | ||
184 | ret = ext4_flush_completed_IO(inode); | 215 | ret = ext4_flush_completed_IO(inode); |
185 | if (ret < 0) | 216 | if (ret < 0) |
186 | goto out; | 217 | goto out; |
187 | 218 | ||
188 | if (!journal) { | 219 | if (!journal) { |
189 | ret = generic_file_fsync(file, datasync); | 220 | ret = __sync_inode(inode, datasync); |
190 | if (!ret && !list_empty(&inode->i_dentry)) | 221 | if (!ret && !list_empty(&inode->i_dentry)) |
191 | ret = ext4_sync_parent(inode); | 222 | ret = ext4_sync_parent(inode); |
192 | goto out; | 223 | goto out; |
@@ -220,6 +251,7 @@ int ext4_sync_file(struct file *file, int datasync) | |||
220 | if (needs_barrier) | 251 | if (needs_barrier) |
221 | blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); | 252 | blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); |
222 | out: | 253 | out: |
254 | mutex_unlock(&inode->i_mutex); | ||
223 | trace_ext4_sync_file_exit(inode, ret); | 255 | trace_ext4_sync_file_exit(inode, ret); |
224 | return ret; | 256 | return ret; |
225 | } | 257 | } |
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index e3126c051006..678cde834f19 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -3501,10 +3501,8 @@ retry: | |||
3501 | offset, nr_segs, | 3501 | offset, nr_segs, |
3502 | ext4_get_block, NULL, NULL, 0); | 3502 | ext4_get_block, NULL, NULL, 0); |
3503 | else { | 3503 | else { |
3504 | ret = blockdev_direct_IO(rw, iocb, inode, | 3504 | ret = blockdev_direct_IO(rw, iocb, inode, iov, |
3505 | inode->i_sb->s_bdev, iov, | 3505 | offset, nr_segs, ext4_get_block); |
3506 | offset, nr_segs, | ||
3507 | ext4_get_block, NULL); | ||
3508 | 3506 | ||
3509 | if (unlikely((rw & WRITE) && ret < 0)) { | 3507 | if (unlikely((rw & WRITE) && ret < 0)) { |
3510 | loff_t isize = i_size_read(inode); | 3508 | loff_t isize = i_size_read(inode); |
@@ -3575,6 +3573,7 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, | |||
3575 | ssize_t size, void *private, int ret, | 3573 | ssize_t size, void *private, int ret, |
3576 | bool is_async) | 3574 | bool is_async) |
3577 | { | 3575 | { |
3576 | struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode; | ||
3578 | ext4_io_end_t *io_end = iocb->private; | 3577 | ext4_io_end_t *io_end = iocb->private; |
3579 | struct workqueue_struct *wq; | 3578 | struct workqueue_struct *wq; |
3580 | unsigned long flags; | 3579 | unsigned long flags; |
@@ -3596,6 +3595,7 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, | |||
3596 | out: | 3595 | out: |
3597 | if (is_async) | 3596 | if (is_async) |
3598 | aio_complete(iocb, ret, 0); | 3597 | aio_complete(iocb, ret, 0); |
3598 | inode_dio_done(inode); | ||
3599 | return; | 3599 | return; |
3600 | } | 3600 | } |
3601 | 3601 | ||
@@ -3616,6 +3616,9 @@ out: | |||
3616 | /* queue the work to convert unwritten extents to written */ | 3616 | /* queue the work to convert unwritten extents to written */ |
3617 | queue_work(wq, &io_end->work); | 3617 | queue_work(wq, &io_end->work); |
3618 | iocb->private = NULL; | 3618 | iocb->private = NULL; |
3619 | |||
3620 | /* XXX: probably should move into the real I/O completion handler */ | ||
3621 | inode_dio_done(inode); | ||
3619 | } | 3622 | } |
3620 | 3623 | ||
3621 | static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate) | 3624 | static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate) |
@@ -3748,11 +3751,13 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, | |||
3748 | EXT4_I(inode)->cur_aio_dio = iocb->private; | 3751 | EXT4_I(inode)->cur_aio_dio = iocb->private; |
3749 | } | 3752 | } |
3750 | 3753 | ||
3751 | ret = blockdev_direct_IO(rw, iocb, inode, | 3754 | ret = __blockdev_direct_IO(rw, iocb, inode, |
3752 | inode->i_sb->s_bdev, iov, | 3755 | inode->i_sb->s_bdev, iov, |
3753 | offset, nr_segs, | 3756 | offset, nr_segs, |
3754 | ext4_get_block_write, | 3757 | ext4_get_block_write, |
3755 | ext4_end_io_dio); | 3758 | ext4_end_io_dio, |
3759 | NULL, | ||
3760 | DIO_LOCKING | DIO_SKIP_HOLES); | ||
3756 | if (iocb->private) | 3761 | if (iocb->private) |
3757 | EXT4_I(inode)->cur_aio_dio = NULL; | 3762 | EXT4_I(inode)->cur_aio_dio = NULL; |
3758 | /* | 3763 | /* |
@@ -5351,6 +5356,8 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) | |||
5351 | } | 5356 | } |
5352 | 5357 | ||
5353 | if (attr->ia_valid & ATTR_SIZE) { | 5358 | if (attr->ia_valid & ATTR_SIZE) { |
5359 | inode_dio_wait(inode); | ||
5360 | |||
5354 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) { | 5361 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) { |
5355 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 5362 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
5356 | 5363 | ||
@@ -5843,80 +5850,84 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
5843 | struct page *page = vmf->page; | 5850 | struct page *page = vmf->page; |
5844 | loff_t size; | 5851 | loff_t size; |
5845 | unsigned long len; | 5852 | unsigned long len; |
5846 | int ret = -EINVAL; | 5853 | int ret; |
5847 | void *fsdata; | ||
5848 | struct file *file = vma->vm_file; | 5854 | struct file *file = vma->vm_file; |
5849 | struct inode *inode = file->f_path.dentry->d_inode; | 5855 | struct inode *inode = file->f_path.dentry->d_inode; |
5850 | struct address_space *mapping = inode->i_mapping; | 5856 | struct address_space *mapping = inode->i_mapping; |
5857 | handle_t *handle; | ||
5858 | get_block_t *get_block; | ||
5859 | int retries = 0; | ||
5851 | 5860 | ||
5852 | /* | 5861 | /* |
5853 | * Get i_alloc_sem to stop truncates messing with the inode. We cannot | 5862 | * This check is racy but catches the common case. We rely on |
5854 | * get i_mutex because we are already holding mmap_sem. | 5863 | * __block_page_mkwrite() to do a reliable check. |
5855 | */ | 5864 | */ |
5856 | down_read(&inode->i_alloc_sem); | 5865 | vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); |
5857 | size = i_size_read(inode); | 5866 | /* Delalloc case is easy... */ |
5858 | if (page->mapping != mapping || size <= page_offset(page) | 5867 | if (test_opt(inode->i_sb, DELALLOC) && |
5859 | || !PageUptodate(page)) { | 5868 | !ext4_should_journal_data(inode) && |
5860 | /* page got truncated from under us? */ | 5869 | !ext4_nonda_switch(inode->i_sb)) { |
5861 | goto out_unlock; | 5870 | do { |
5871 | ret = __block_page_mkwrite(vma, vmf, | ||
5872 | ext4_da_get_block_prep); | ||
5873 | } while (ret == -ENOSPC && | ||
5874 | ext4_should_retry_alloc(inode->i_sb, &retries)); | ||
5875 | goto out_ret; | ||
5862 | } | 5876 | } |
5863 | ret = 0; | ||
5864 | 5877 | ||
5865 | lock_page(page); | 5878 | lock_page(page); |
5866 | wait_on_page_writeback(page); | 5879 | size = i_size_read(inode); |
5867 | if (PageMappedToDisk(page)) { | 5880 | /* Page got truncated from under us? */ |
5868 | up_read(&inode->i_alloc_sem); | 5881 | if (page->mapping != mapping || page_offset(page) > size) { |
5869 | return VM_FAULT_LOCKED; | 5882 | unlock_page(page); |
5883 | ret = VM_FAULT_NOPAGE; | ||
5884 | goto out; | ||
5870 | } | 5885 | } |
5871 | 5886 | ||
5872 | if (page->index == size >> PAGE_CACHE_SHIFT) | 5887 | if (page->index == size >> PAGE_CACHE_SHIFT) |
5873 | len = size & ~PAGE_CACHE_MASK; | 5888 | len = size & ~PAGE_CACHE_MASK; |
5874 | else | 5889 | else |
5875 | len = PAGE_CACHE_SIZE; | 5890 | len = PAGE_CACHE_SIZE; |
5876 | |||
5877 | /* | 5891 | /* |
5878 | * return if we have all the buffers mapped. This avoid | 5892 | * Return if we have all the buffers mapped. This avoids the need to do |
5879 | * the need to call write_begin/write_end which does a | 5893 | * journal_start/journal_stop which can block and take a long time |
5880 | * journal_start/journal_stop which can block and take | ||
5881 | * long time | ||
5882 | */ | 5894 | */ |
5883 | if (page_has_buffers(page)) { | 5895 | if (page_has_buffers(page)) { |
5884 | if (!walk_page_buffers(NULL, page_buffers(page), 0, len, NULL, | 5896 | if (!walk_page_buffers(NULL, page_buffers(page), 0, len, NULL, |
5885 | ext4_bh_unmapped)) { | 5897 | ext4_bh_unmapped)) { |
5886 | up_read(&inode->i_alloc_sem); | 5898 | /* Wait so that we don't change page under IO */ |
5887 | return VM_FAULT_LOCKED; | 5899 | wait_on_page_writeback(page); |
5900 | ret = VM_FAULT_LOCKED; | ||
5901 | goto out; | ||
5888 | } | 5902 | } |
5889 | } | 5903 | } |
5890 | unlock_page(page); | 5904 | unlock_page(page); |
5891 | /* | 5905 | /* OK, we need to fill the hole... */ |
5892 | * OK, we need to fill the hole... Do write_begin write_end | 5906 | if (ext4_should_dioread_nolock(inode)) |
5893 | * to do block allocation/reservation.We are not holding | 5907 | get_block = ext4_get_block_write; |
5894 | * inode.i__mutex here. That allow * parallel write_begin, | 5908 | else |
5895 | * write_end call. lock_page prevent this from happening | 5909 | get_block = ext4_get_block; |
5896 | * on the same page though | 5910 | retry_alloc: |
5897 | */ | 5911 | handle = ext4_journal_start(inode, ext4_writepage_trans_blocks(inode)); |
5898 | ret = mapping->a_ops->write_begin(file, mapping, page_offset(page), | 5912 | if (IS_ERR(handle)) { |
5899 | len, AOP_FLAG_UNINTERRUPTIBLE, &page, &fsdata); | ||
5900 | if (ret < 0) | ||
5901 | goto out_unlock; | ||
5902 | ret = mapping->a_ops->write_end(file, mapping, page_offset(page), | ||
5903 | len, len, page, fsdata); | ||
5904 | if (ret < 0) | ||
5905 | goto out_unlock; | ||
5906 | ret = 0; | ||
5907 | |||
5908 | /* | ||
5909 | * write_begin/end might have created a dirty page and someone | ||
5910 | * could wander in and start the IO. Make sure that hasn't | ||
5911 | * happened. | ||
5912 | */ | ||
5913 | lock_page(page); | ||
5914 | wait_on_page_writeback(page); | ||
5915 | up_read(&inode->i_alloc_sem); | ||
5916 | return VM_FAULT_LOCKED; | ||
5917 | out_unlock: | ||
5918 | if (ret) | ||
5919 | ret = VM_FAULT_SIGBUS; | 5913 | ret = VM_FAULT_SIGBUS; |
5920 | up_read(&inode->i_alloc_sem); | 5914 | goto out; |
5915 | } | ||
5916 | ret = __block_page_mkwrite(vma, vmf, get_block); | ||
5917 | if (!ret && ext4_should_journal_data(inode)) { | ||
5918 | if (walk_page_buffers(handle, page_buffers(page), 0, | ||
5919 | PAGE_CACHE_SIZE, NULL, do_journal_get_write_access)) { | ||
5920 | unlock_page(page); | ||
5921 | ret = VM_FAULT_SIGBUS; | ||
5922 | goto out; | ||
5923 | } | ||
5924 | ext4_set_inode_state(inode, EXT4_STATE_JDATA); | ||
5925 | } | ||
5926 | ext4_journal_stop(handle); | ||
5927 | if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) | ||
5928 | goto retry_alloc; | ||
5929 | out_ret: | ||
5930 | ret = block_page_mkwrite_return(ret); | ||
5931 | out: | ||
5921 | return ret; | 5932 | return ret; |
5922 | } | 5933 | } |
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index b754b7721f51..707d605bf769 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c | |||
@@ -1037,15 +1037,11 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, stru | |||
1037 | return ERR_PTR(-EIO); | 1037 | return ERR_PTR(-EIO); |
1038 | } | 1038 | } |
1039 | inode = ext4_iget(dir->i_sb, ino); | 1039 | inode = ext4_iget(dir->i_sb, ino); |
1040 | if (IS_ERR(inode)) { | 1040 | if (inode == ERR_PTR(-ESTALE)) { |
1041 | if (PTR_ERR(inode) == -ESTALE) { | 1041 | EXT4_ERROR_INODE(dir, |
1042 | EXT4_ERROR_INODE(dir, | 1042 | "deleted inode referenced: %u", |
1043 | "deleted inode referenced: %u", | 1043 | ino); |
1044 | ino); | 1044 | return ERR_PTR(-EIO); |
1045 | return ERR_PTR(-EIO); | ||
1046 | } else { | ||
1047 | return ERR_CAST(inode); | ||
1048 | } | ||
1049 | } | 1045 | } |
1050 | } | 1046 | } |
1051 | return d_splice_alias(inode, dentry); | 1047 | return d_splice_alias(inode, dentry); |