aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ext4')
-rw-r--r--fs/ext4/acl.c4
-rw-r--r--fs/ext4/acl.h2
-rw-r--r--fs/ext4/ext4.h2
-rw-r--r--fs/ext4/file.c21
-rw-r--r--fs/ext4/fsync.c38
-rw-r--r--fs/ext4/inode.c125
-rw-r--r--fs/ext4/namei.c14
7 files changed, 133 insertions, 73 deletions
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
index 21eacd7b7d79..60d900fcc3db 100644
--- a/fs/ext4/acl.c
+++ b/fs/ext4/acl.c
@@ -238,11 +238,11 @@ ext4_set_acl(handle_t *handle, struct inode *inode, int type,
238} 238}
239 239
240int 240int
241ext4_check_acl(struct inode *inode, int mask, unsigned int flags) 241ext4_check_acl(struct inode *inode, int mask)
242{ 242{
243 struct posix_acl *acl; 243 struct posix_acl *acl;
244 244
245 if (flags & IPERM_FLAG_RCU) { 245 if (mask & MAY_NOT_BLOCK) {
246 if (!negative_cached_acl(inode, ACL_TYPE_ACCESS)) 246 if (!negative_cached_acl(inode, ACL_TYPE_ACCESS))
247 return -ECHILD; 247 return -ECHILD;
248 return -EAGAIN; 248 return -EAGAIN;
diff --git a/fs/ext4/acl.h b/fs/ext4/acl.h
index dec821168fd4..9d843d5deac4 100644
--- a/fs/ext4/acl.h
+++ b/fs/ext4/acl.h
@@ -54,7 +54,7 @@ static inline int ext4_acl_count(size_t size)
54#ifdef CONFIG_EXT4_FS_POSIX_ACL 54#ifdef CONFIG_EXT4_FS_POSIX_ACL
55 55
56/* acl.c */ 56/* acl.c */
57extern int ext4_check_acl(struct inode *, int, unsigned int); 57extern int ext4_check_acl(struct inode *, int);
58extern int ext4_acl_chmod(struct inode *); 58extern int ext4_acl_chmod(struct inode *);
59extern int ext4_init_acl(handle_t *, struct inode *, struct inode *); 59extern int ext4_init_acl(handle_t *, struct inode *, struct inode *);
60 60
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 1921392cd708..fa44df879711 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1758,7 +1758,7 @@ extern int ext4_htree_store_dirent(struct file *dir_file, __u32 hash,
1758extern void ext4_htree_free_dir_info(struct dir_private_info *p); 1758extern void ext4_htree_free_dir_info(struct dir_private_info *p);
1759 1759
1760/* fsync.c */ 1760/* fsync.c */
1761extern int ext4_sync_file(struct file *, int); 1761extern int ext4_sync_file(struct file *, loff_t, loff_t, int);
1762extern int ext4_flush_completed_IO(struct inode *); 1762extern int ext4_flush_completed_IO(struct inode *);
1763 1763
1764/* hash.c */ 1764/* hash.c */
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 2c0972322009..ce766f974b1d 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -236,6 +236,27 @@ loff_t ext4_llseek(struct file *file, loff_t offset, int origin)
236 } 236 }
237 offset += file->f_pos; 237 offset += file->f_pos;
238 break; 238 break;
239 case SEEK_DATA:
240 /*
241 * In the generic case the entire file is data, so as long as
242 * offset isn't at the end of the file then the offset is data.
243 */
244 if (offset >= inode->i_size) {
245 mutex_unlock(&inode->i_mutex);
246 return -ENXIO;
247 }
248 break;
249 case SEEK_HOLE:
250 /*
251 * There is a virtual hole at the end of the file, so as long as
252 * offset isn't i_size or larger, return i_size.
253 */
254 if (offset >= inode->i_size) {
255 mutex_unlock(&inode->i_mutex);
256 return -ENXIO;
257 }
258 offset = inode->i_size;
259 break;
239 } 260 }
240 261
241 if (offset < 0 || offset > maxbytes) { 262 if (offset < 0 || offset > maxbytes) {
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index ce66d2fe826c..da3bed3e0c29 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -151,6 +151,32 @@ static int ext4_sync_parent(struct inode *inode)
151 return ret; 151 return ret;
152} 152}
153 153
154/**
155 * __sync_file - generic_file_fsync without the locking and filemap_write
156 * @inode: inode to sync
157 * @datasync: only sync essential metadata if true
158 *
159 * This is just generic_file_fsync without the locking. This is needed for
160 * nojournal mode to make sure this inodes data/metadata makes it to disk
161 * properly. The i_mutex should be held already.
162 */
163static int __sync_inode(struct inode *inode, int datasync)
164{
165 int err;
166 int ret;
167
168 ret = sync_mapping_buffers(inode->i_mapping);
169 if (!(inode->i_state & I_DIRTY))
170 return ret;
171 if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
172 return ret;
173
174 err = sync_inode_metadata(inode, 1);
175 if (ret == 0)
176 ret = err;
177 return ret;
178}
179
154/* 180/*
155 * akpm: A new design for ext4_sync_file(). 181 * akpm: A new design for ext4_sync_file().
156 * 182 *
@@ -165,7 +191,7 @@ static int ext4_sync_parent(struct inode *inode)
165 * i_mutex lock is held when entering and exiting this function 191 * i_mutex lock is held when entering and exiting this function
166 */ 192 */
167 193
168int ext4_sync_file(struct file *file, int datasync) 194int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
169{ 195{
170 struct inode *inode = file->f_mapping->host; 196 struct inode *inode = file->f_mapping->host;
171 struct ext4_inode_info *ei = EXT4_I(inode); 197 struct ext4_inode_info *ei = EXT4_I(inode);
@@ -178,15 +204,20 @@ int ext4_sync_file(struct file *file, int datasync)
178 204
179 trace_ext4_sync_file_enter(file, datasync); 205 trace_ext4_sync_file_enter(file, datasync);
180 206
207 ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
208 if (ret)
209 return ret;
210 mutex_lock(&inode->i_mutex);
211
181 if (inode->i_sb->s_flags & MS_RDONLY) 212 if (inode->i_sb->s_flags & MS_RDONLY)
182 return 0; 213 goto out;
183 214
184 ret = ext4_flush_completed_IO(inode); 215 ret = ext4_flush_completed_IO(inode);
185 if (ret < 0) 216 if (ret < 0)
186 goto out; 217 goto out;
187 218
188 if (!journal) { 219 if (!journal) {
189 ret = generic_file_fsync(file, datasync); 220 ret = __sync_inode(inode, datasync);
190 if (!ret && !list_empty(&inode->i_dentry)) 221 if (!ret && !list_empty(&inode->i_dentry))
191 ret = ext4_sync_parent(inode); 222 ret = ext4_sync_parent(inode);
192 goto out; 223 goto out;
@@ -220,6 +251,7 @@ int ext4_sync_file(struct file *file, int datasync)
220 if (needs_barrier) 251 if (needs_barrier)
221 blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); 252 blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
222 out: 253 out:
254 mutex_unlock(&inode->i_mutex);
223 trace_ext4_sync_file_exit(inode, ret); 255 trace_ext4_sync_file_exit(inode, ret);
224 return ret; 256 return ret;
225} 257}
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index e3126c051006..678cde834f19 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3501,10 +3501,8 @@ retry:
3501 offset, nr_segs, 3501 offset, nr_segs,
3502 ext4_get_block, NULL, NULL, 0); 3502 ext4_get_block, NULL, NULL, 0);
3503 else { 3503 else {
3504 ret = blockdev_direct_IO(rw, iocb, inode, 3504 ret = blockdev_direct_IO(rw, iocb, inode, iov,
3505 inode->i_sb->s_bdev, iov, 3505 offset, nr_segs, ext4_get_block);
3506 offset, nr_segs,
3507 ext4_get_block, NULL);
3508 3506
3509 if (unlikely((rw & WRITE) && ret < 0)) { 3507 if (unlikely((rw & WRITE) && ret < 0)) {
3510 loff_t isize = i_size_read(inode); 3508 loff_t isize = i_size_read(inode);
@@ -3575,6 +3573,7 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
3575 ssize_t size, void *private, int ret, 3573 ssize_t size, void *private, int ret,
3576 bool is_async) 3574 bool is_async)
3577{ 3575{
3576 struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode;
3578 ext4_io_end_t *io_end = iocb->private; 3577 ext4_io_end_t *io_end = iocb->private;
3579 struct workqueue_struct *wq; 3578 struct workqueue_struct *wq;
3580 unsigned long flags; 3579 unsigned long flags;
@@ -3596,6 +3595,7 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
3596out: 3595out:
3597 if (is_async) 3596 if (is_async)
3598 aio_complete(iocb, ret, 0); 3597 aio_complete(iocb, ret, 0);
3598 inode_dio_done(inode);
3599 return; 3599 return;
3600 } 3600 }
3601 3601
@@ -3616,6 +3616,9 @@ out:
3616 /* queue the work to convert unwritten extents to written */ 3616 /* queue the work to convert unwritten extents to written */
3617 queue_work(wq, &io_end->work); 3617 queue_work(wq, &io_end->work);
3618 iocb->private = NULL; 3618 iocb->private = NULL;
3619
3620 /* XXX: probably should move into the real I/O completion handler */
3621 inode_dio_done(inode);
3619} 3622}
3620 3623
3621static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate) 3624static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate)
@@ -3748,11 +3751,13 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
3748 EXT4_I(inode)->cur_aio_dio = iocb->private; 3751 EXT4_I(inode)->cur_aio_dio = iocb->private;
3749 } 3752 }
3750 3753
3751 ret = blockdev_direct_IO(rw, iocb, inode, 3754 ret = __blockdev_direct_IO(rw, iocb, inode,
3752 inode->i_sb->s_bdev, iov, 3755 inode->i_sb->s_bdev, iov,
3753 offset, nr_segs, 3756 offset, nr_segs,
3754 ext4_get_block_write, 3757 ext4_get_block_write,
3755 ext4_end_io_dio); 3758 ext4_end_io_dio,
3759 NULL,
3760 DIO_LOCKING | DIO_SKIP_HOLES);
3756 if (iocb->private) 3761 if (iocb->private)
3757 EXT4_I(inode)->cur_aio_dio = NULL; 3762 EXT4_I(inode)->cur_aio_dio = NULL;
3758 /* 3763 /*
@@ -5351,6 +5356,8 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
5351 } 5356 }
5352 5357
5353 if (attr->ia_valid & ATTR_SIZE) { 5358 if (attr->ia_valid & ATTR_SIZE) {
5359 inode_dio_wait(inode);
5360
5354 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) { 5361 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
5355 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 5362 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
5356 5363
@@ -5843,80 +5850,84 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
5843 struct page *page = vmf->page; 5850 struct page *page = vmf->page;
5844 loff_t size; 5851 loff_t size;
5845 unsigned long len; 5852 unsigned long len;
5846 int ret = -EINVAL; 5853 int ret;
5847 void *fsdata;
5848 struct file *file = vma->vm_file; 5854 struct file *file = vma->vm_file;
5849 struct inode *inode = file->f_path.dentry->d_inode; 5855 struct inode *inode = file->f_path.dentry->d_inode;
5850 struct address_space *mapping = inode->i_mapping; 5856 struct address_space *mapping = inode->i_mapping;
5857 handle_t *handle;
5858 get_block_t *get_block;
5859 int retries = 0;
5851 5860
5852 /* 5861 /*
5853 * Get i_alloc_sem to stop truncates messing with the inode. We cannot 5862 * This check is racy but catches the common case. We rely on
5854 * get i_mutex because we are already holding mmap_sem. 5863 * __block_page_mkwrite() to do a reliable check.
5855 */ 5864 */
5856 down_read(&inode->i_alloc_sem); 5865 vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
5857 size = i_size_read(inode); 5866 /* Delalloc case is easy... */
5858 if (page->mapping != mapping || size <= page_offset(page) 5867 if (test_opt(inode->i_sb, DELALLOC) &&
5859 || !PageUptodate(page)) { 5868 !ext4_should_journal_data(inode) &&
5860 /* page got truncated from under us? */ 5869 !ext4_nonda_switch(inode->i_sb)) {
5861 goto out_unlock; 5870 do {
5871 ret = __block_page_mkwrite(vma, vmf,
5872 ext4_da_get_block_prep);
5873 } while (ret == -ENOSPC &&
5874 ext4_should_retry_alloc(inode->i_sb, &retries));
5875 goto out_ret;
5862 } 5876 }
5863 ret = 0;
5864 5877
5865 lock_page(page); 5878 lock_page(page);
5866 wait_on_page_writeback(page); 5879 size = i_size_read(inode);
5867 if (PageMappedToDisk(page)) { 5880 /* Page got truncated from under us? */
5868 up_read(&inode->i_alloc_sem); 5881 if (page->mapping != mapping || page_offset(page) > size) {
5869 return VM_FAULT_LOCKED; 5882 unlock_page(page);
5883 ret = VM_FAULT_NOPAGE;
5884 goto out;
5870 } 5885 }
5871 5886
5872 if (page->index == size >> PAGE_CACHE_SHIFT) 5887 if (page->index == size >> PAGE_CACHE_SHIFT)
5873 len = size & ~PAGE_CACHE_MASK; 5888 len = size & ~PAGE_CACHE_MASK;
5874 else 5889 else
5875 len = PAGE_CACHE_SIZE; 5890 len = PAGE_CACHE_SIZE;
5876
5877 /* 5891 /*
5878 * return if we have all the buffers mapped. This avoid 5892 * Return if we have all the buffers mapped. This avoids the need to do
5879 * the need to call write_begin/write_end which does a 5893 * journal_start/journal_stop which can block and take a long time
5880 * journal_start/journal_stop which can block and take
5881 * long time
5882 */ 5894 */
5883 if (page_has_buffers(page)) { 5895 if (page_has_buffers(page)) {
5884 if (!walk_page_buffers(NULL, page_buffers(page), 0, len, NULL, 5896 if (!walk_page_buffers(NULL, page_buffers(page), 0, len, NULL,
5885 ext4_bh_unmapped)) { 5897 ext4_bh_unmapped)) {
5886 up_read(&inode->i_alloc_sem); 5898 /* Wait so that we don't change page under IO */
5887 return VM_FAULT_LOCKED; 5899 wait_on_page_writeback(page);
5900 ret = VM_FAULT_LOCKED;
5901 goto out;
5888 } 5902 }
5889 } 5903 }
5890 unlock_page(page); 5904 unlock_page(page);
5891 /* 5905 /* OK, we need to fill the hole... */
5892 * OK, we need to fill the hole... Do write_begin write_end 5906 if (ext4_should_dioread_nolock(inode))
5893 * to do block allocation/reservation.We are not holding 5907 get_block = ext4_get_block_write;
5894 * inode.i__mutex here. That allow * parallel write_begin, 5908 else
5895 * write_end call. lock_page prevent this from happening 5909 get_block = ext4_get_block;
5896 * on the same page though 5910retry_alloc:
5897 */ 5911 handle = ext4_journal_start(inode, ext4_writepage_trans_blocks(inode));
5898 ret = mapping->a_ops->write_begin(file, mapping, page_offset(page), 5912 if (IS_ERR(handle)) {
5899 len, AOP_FLAG_UNINTERRUPTIBLE, &page, &fsdata);
5900 if (ret < 0)
5901 goto out_unlock;
5902 ret = mapping->a_ops->write_end(file, mapping, page_offset(page),
5903 len, len, page, fsdata);
5904 if (ret < 0)
5905 goto out_unlock;
5906 ret = 0;
5907
5908 /*
5909 * write_begin/end might have created a dirty page and someone
5910 * could wander in and start the IO. Make sure that hasn't
5911 * happened.
5912 */
5913 lock_page(page);
5914 wait_on_page_writeback(page);
5915 up_read(&inode->i_alloc_sem);
5916 return VM_FAULT_LOCKED;
5917out_unlock:
5918 if (ret)
5919 ret = VM_FAULT_SIGBUS; 5913 ret = VM_FAULT_SIGBUS;
5920 up_read(&inode->i_alloc_sem); 5914 goto out;
5915 }
5916 ret = __block_page_mkwrite(vma, vmf, get_block);
5917 if (!ret && ext4_should_journal_data(inode)) {
5918 if (walk_page_buffers(handle, page_buffers(page), 0,
5919 PAGE_CACHE_SIZE, NULL, do_journal_get_write_access)) {
5920 unlock_page(page);
5921 ret = VM_FAULT_SIGBUS;
5922 goto out;
5923 }
5924 ext4_set_inode_state(inode, EXT4_STATE_JDATA);
5925 }
5926 ext4_journal_stop(handle);
5927 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
5928 goto retry_alloc;
5929out_ret:
5930 ret = block_page_mkwrite_return(ret);
5931out:
5921 return ret; 5932 return ret;
5922} 5933}
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index b754b7721f51..707d605bf769 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1037,15 +1037,11 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, stru
1037 return ERR_PTR(-EIO); 1037 return ERR_PTR(-EIO);
1038 } 1038 }
1039 inode = ext4_iget(dir->i_sb, ino); 1039 inode = ext4_iget(dir->i_sb, ino);
1040 if (IS_ERR(inode)) { 1040 if (inode == ERR_PTR(-ESTALE)) {
1041 if (PTR_ERR(inode) == -ESTALE) { 1041 EXT4_ERROR_INODE(dir,
1042 EXT4_ERROR_INODE(dir, 1042 "deleted inode referenced: %u",
1043 "deleted inode referenced: %u", 1043 ino);
1044 ino); 1044 return ERR_PTR(-EIO);
1045 return ERR_PTR(-EIO);
1046 } else {
1047 return ERR_CAST(inode);
1048 }
1049 } 1045 }
1050 } 1046 }
1051 return d_splice_alias(inode, dentry); 1047 return d_splice_alias(inode, dentry);