diff options
Diffstat (limited to 'fs')
141 files changed, 2597 insertions, 2228 deletions
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c index c71e88602ff4..cc1cfae726b3 100644 --- a/fs/9p/vfs_addr.c +++ b/fs/9p/vfs_addr.c | |||
| @@ -259,8 +259,7 @@ static int v9fs_launder_page(struct page *page) | |||
| 259 | * | 259 | * |
| 260 | */ | 260 | */ |
| 261 | static ssize_t | 261 | static ssize_t |
| 262 | v9fs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, | 262 | v9fs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, loff_t pos) |
| 263 | loff_t pos, unsigned long nr_segs) | ||
| 264 | { | 263 | { |
| 265 | /* | 264 | /* |
| 266 | * FIXME | 265 | * FIXME |
| @@ -269,7 +268,7 @@ v9fs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, | |||
| 269 | */ | 268 | */ |
| 270 | p9_debug(P9_DEBUG_VFS, "v9fs_direct_IO: v9fs_direct_IO (%s) off/no(%lld/%lu) EINVAL\n", | 269 | p9_debug(P9_DEBUG_VFS, "v9fs_direct_IO: v9fs_direct_IO (%s) off/no(%lld/%lu) EINVAL\n", |
| 271 | iocb->ki_filp->f_path.dentry->d_name.name, | 270 | iocb->ki_filp->f_path.dentry->d_name.name, |
| 272 | (long long)pos, nr_segs); | 271 | (long long)pos, iter->nr_segs); |
| 273 | 272 | ||
| 274 | return -EINVAL; | 273 | return -EINVAL; |
| 275 | } | 274 | } |
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index 96e550760699..520c11c2dcca 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c | |||
| @@ -692,7 +692,7 @@ v9fs_cached_file_read(struct file *filp, char __user *data, size_t count, | |||
| 692 | { | 692 | { |
| 693 | if (filp->f_flags & O_DIRECT) | 693 | if (filp->f_flags & O_DIRECT) |
| 694 | return v9fs_direct_read(filp, data, count, offset); | 694 | return v9fs_direct_read(filp, data, count, offset); |
| 695 | return do_sync_read(filp, data, count, offset); | 695 | return new_sync_read(filp, data, count, offset); |
| 696 | } | 696 | } |
| 697 | 697 | ||
| 698 | /** | 698 | /** |
| @@ -760,7 +760,7 @@ err_out: | |||
| 760 | 760 | ||
| 761 | buff_write: | 761 | buff_write: |
| 762 | mutex_unlock(&inode->i_mutex); | 762 | mutex_unlock(&inode->i_mutex); |
| 763 | return do_sync_write(filp, data, count, offsetp); | 763 | return new_sync_write(filp, data, count, offsetp); |
| 764 | } | 764 | } |
| 765 | 765 | ||
| 766 | /** | 766 | /** |
| @@ -778,7 +778,7 @@ v9fs_cached_file_write(struct file *filp, const char __user * data, | |||
| 778 | 778 | ||
| 779 | if (filp->f_flags & O_DIRECT) | 779 | if (filp->f_flags & O_DIRECT) |
| 780 | return v9fs_direct_write(filp, data, count, offset); | 780 | return v9fs_direct_write(filp, data, count, offset); |
| 781 | return do_sync_write(filp, data, count, offset); | 781 | return new_sync_write(filp, data, count, offset); |
| 782 | } | 782 | } |
| 783 | 783 | ||
| 784 | 784 | ||
| @@ -847,8 +847,8 @@ const struct file_operations v9fs_cached_file_operations = { | |||
| 847 | .llseek = generic_file_llseek, | 847 | .llseek = generic_file_llseek, |
| 848 | .read = v9fs_cached_file_read, | 848 | .read = v9fs_cached_file_read, |
| 849 | .write = v9fs_cached_file_write, | 849 | .write = v9fs_cached_file_write, |
| 850 | .aio_read = generic_file_aio_read, | 850 | .read_iter = generic_file_read_iter, |
| 851 | .aio_write = generic_file_aio_write, | 851 | .write_iter = generic_file_write_iter, |
| 852 | .open = v9fs_file_open, | 852 | .open = v9fs_file_open, |
| 853 | .release = v9fs_dir_release, | 853 | .release = v9fs_dir_release, |
| 854 | .lock = v9fs_file_lock, | 854 | .lock = v9fs_file_lock, |
| @@ -860,8 +860,8 @@ const struct file_operations v9fs_cached_file_operations_dotl = { | |||
| 860 | .llseek = generic_file_llseek, | 860 | .llseek = generic_file_llseek, |
| 861 | .read = v9fs_cached_file_read, | 861 | .read = v9fs_cached_file_read, |
| 862 | .write = v9fs_cached_file_write, | 862 | .write = v9fs_cached_file_write, |
| 863 | .aio_read = generic_file_aio_read, | 863 | .read_iter = generic_file_read_iter, |
| 864 | .aio_write = generic_file_aio_write, | 864 | .write_iter = generic_file_write_iter, |
| 865 | .open = v9fs_file_open, | 865 | .open = v9fs_file_open, |
| 866 | .release = v9fs_dir_release, | 866 | .release = v9fs_dir_release, |
| 867 | .lock = v9fs_file_lock_dotl, | 867 | .lock = v9fs_file_lock_dotl, |
diff --git a/fs/adfs/file.c b/fs/adfs/file.c index a36da5382b40..07c9edce5aa7 100644 --- a/fs/adfs/file.c +++ b/fs/adfs/file.c | |||
| @@ -23,12 +23,12 @@ | |||
| 23 | 23 | ||
| 24 | const struct file_operations adfs_file_operations = { | 24 | const struct file_operations adfs_file_operations = { |
| 25 | .llseek = generic_file_llseek, | 25 | .llseek = generic_file_llseek, |
| 26 | .read = do_sync_read, | 26 | .read = new_sync_read, |
| 27 | .aio_read = generic_file_aio_read, | 27 | .read_iter = generic_file_read_iter, |
| 28 | .mmap = generic_file_mmap, | 28 | .mmap = generic_file_mmap, |
| 29 | .fsync = generic_file_fsync, | 29 | .fsync = generic_file_fsync, |
| 30 | .write = do_sync_write, | 30 | .write = new_sync_write, |
| 31 | .aio_write = generic_file_aio_write, | 31 | .write_iter = generic_file_write_iter, |
| 32 | .splice_read = generic_file_splice_read, | 32 | .splice_read = generic_file_splice_read, |
| 33 | }; | 33 | }; |
| 34 | 34 | ||
diff --git a/fs/affs/file.c b/fs/affs/file.c index 0270303388ee..a7fe57d2cd9a 100644 --- a/fs/affs/file.c +++ b/fs/affs/file.c | |||
| @@ -27,10 +27,10 @@ static int affs_file_release(struct inode *inode, struct file *filp); | |||
| 27 | 27 | ||
| 28 | const struct file_operations affs_file_operations = { | 28 | const struct file_operations affs_file_operations = { |
| 29 | .llseek = generic_file_llseek, | 29 | .llseek = generic_file_llseek, |
| 30 | .read = do_sync_read, | 30 | .read = new_sync_read, |
| 31 | .aio_read = generic_file_aio_read, | 31 | .read_iter = generic_file_read_iter, |
| 32 | .write = do_sync_write, | 32 | .write = new_sync_write, |
| 33 | .aio_write = generic_file_aio_write, | 33 | .write_iter = generic_file_write_iter, |
| 34 | .mmap = generic_file_mmap, | 34 | .mmap = generic_file_mmap, |
| 35 | .open = affs_file_open, | 35 | .open = affs_file_open, |
| 36 | .release = affs_file_release, | 36 | .release = affs_file_release, |
diff --git a/fs/afs/file.c b/fs/afs/file.c index 66d50fe2ee45..932ce07948b3 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c | |||
| @@ -31,10 +31,10 @@ const struct file_operations afs_file_operations = { | |||
| 31 | .open = afs_open, | 31 | .open = afs_open, |
| 32 | .release = afs_release, | 32 | .release = afs_release, |
| 33 | .llseek = generic_file_llseek, | 33 | .llseek = generic_file_llseek, |
| 34 | .read = do_sync_read, | 34 | .read = new_sync_read, |
| 35 | .write = do_sync_write, | 35 | .write = new_sync_write, |
| 36 | .aio_read = generic_file_aio_read, | 36 | .read_iter = generic_file_read_iter, |
| 37 | .aio_write = afs_file_write, | 37 | .write_iter = afs_file_write, |
| 38 | .mmap = generic_file_readonly_mmap, | 38 | .mmap = generic_file_readonly_mmap, |
| 39 | .splice_read = generic_file_splice_read, | 39 | .splice_read = generic_file_splice_read, |
| 40 | .fsync = afs_fsync, | 40 | .fsync = afs_fsync, |
diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 590b55f46d61..71d5982312f3 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h | |||
| @@ -747,8 +747,7 @@ extern int afs_write_end(struct file *file, struct address_space *mapping, | |||
| 747 | extern int afs_writepage(struct page *, struct writeback_control *); | 747 | extern int afs_writepage(struct page *, struct writeback_control *); |
| 748 | extern int afs_writepages(struct address_space *, struct writeback_control *); | 748 | extern int afs_writepages(struct address_space *, struct writeback_control *); |
| 749 | extern void afs_pages_written_back(struct afs_vnode *, struct afs_call *); | 749 | extern void afs_pages_written_back(struct afs_vnode *, struct afs_call *); |
| 750 | extern ssize_t afs_file_write(struct kiocb *, const struct iovec *, | 750 | extern ssize_t afs_file_write(struct kiocb *, struct iov_iter *); |
| 751 | unsigned long, loff_t); | ||
| 752 | extern int afs_writeback_all(struct afs_vnode *); | 751 | extern int afs_writeback_all(struct afs_vnode *); |
| 753 | extern int afs_fsync(struct file *, loff_t, loff_t, int); | 752 | extern int afs_fsync(struct file *, loff_t, loff_t, int); |
| 754 | 753 | ||
diff --git a/fs/afs/write.c b/fs/afs/write.c index a890db4b9898..ab6adfd52516 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c | |||
| @@ -625,15 +625,14 @@ void afs_pages_written_back(struct afs_vnode *vnode, struct afs_call *call) | |||
| 625 | /* | 625 | /* |
| 626 | * write to an AFS file | 626 | * write to an AFS file |
| 627 | */ | 627 | */ |
| 628 | ssize_t afs_file_write(struct kiocb *iocb, const struct iovec *iov, | 628 | ssize_t afs_file_write(struct kiocb *iocb, struct iov_iter *from) |
| 629 | unsigned long nr_segs, loff_t pos) | ||
| 630 | { | 629 | { |
| 631 | struct afs_vnode *vnode = AFS_FS_I(file_inode(iocb->ki_filp)); | 630 | struct afs_vnode *vnode = AFS_FS_I(file_inode(iocb->ki_filp)); |
| 632 | ssize_t result; | 631 | ssize_t result; |
| 633 | size_t count = iov_length(iov, nr_segs); | 632 | size_t count = iov_iter_count(from); |
| 634 | 633 | ||
| 635 | _enter("{%x.%u},{%zu},%lu,", | 634 | _enter("{%x.%u},{%zu},", |
| 636 | vnode->fid.vid, vnode->fid.vnode, count, nr_segs); | 635 | vnode->fid.vid, vnode->fid.vnode, count); |
| 637 | 636 | ||
| 638 | if (IS_SWAPFILE(&vnode->vfs_inode)) { | 637 | if (IS_SWAPFILE(&vnode->vfs_inode)) { |
| 639 | printk(KERN_INFO | 638 | printk(KERN_INFO |
| @@ -644,7 +643,7 @@ ssize_t afs_file_write(struct kiocb *iocb, const struct iovec *iov, | |||
| 644 | if (!count) | 643 | if (!count) |
| 645 | return 0; | 644 | return 0; |
| 646 | 645 | ||
| 647 | result = generic_file_aio_write(iocb, iov, nr_segs, pos); | 646 | result = generic_file_write_iter(iocb, from); |
| 648 | if (IS_ERR_VALUE(result)) { | 647 | if (IS_ERR_VALUE(result)) { |
| 649 | _leave(" = %zd", result); | 648 | _leave(" = %zd", result); |
| 650 | return result; | 649 | return result; |
| @@ -477,7 +477,7 @@ void kiocb_set_cancel_fn(struct kiocb *req, kiocb_cancel_fn *cancel) | |||
| 477 | } | 477 | } |
| 478 | EXPORT_SYMBOL(kiocb_set_cancel_fn); | 478 | EXPORT_SYMBOL(kiocb_set_cancel_fn); |
| 479 | 479 | ||
| 480 | static int kiocb_cancel(struct kioctx *ctx, struct kiocb *kiocb) | 480 | static int kiocb_cancel(struct kiocb *kiocb) |
| 481 | { | 481 | { |
| 482 | kiocb_cancel_fn *old, *cancel; | 482 | kiocb_cancel_fn *old, *cancel; |
| 483 | 483 | ||
| @@ -538,7 +538,7 @@ static void free_ioctx_users(struct percpu_ref *ref) | |||
| 538 | struct kiocb, ki_list); | 538 | struct kiocb, ki_list); |
| 539 | 539 | ||
| 540 | list_del_init(&req->ki_list); | 540 | list_del_init(&req->ki_list); |
| 541 | kiocb_cancel(ctx, req); | 541 | kiocb_cancel(req); |
| 542 | } | 542 | } |
| 543 | 543 | ||
| 544 | spin_unlock_irq(&ctx->ctx_lock); | 544 | spin_unlock_irq(&ctx->ctx_lock); |
| @@ -727,42 +727,42 @@ err: | |||
| 727 | * when the processes owning a context have all exited to encourage | 727 | * when the processes owning a context have all exited to encourage |
| 728 | * the rapid destruction of the kioctx. | 728 | * the rapid destruction of the kioctx. |
| 729 | */ | 729 | */ |
| 730 | static void kill_ioctx(struct mm_struct *mm, struct kioctx *ctx, | 730 | static int kill_ioctx(struct mm_struct *mm, struct kioctx *ctx, |
| 731 | struct completion *requests_done) | 731 | struct completion *requests_done) |
| 732 | { | 732 | { |
| 733 | if (!atomic_xchg(&ctx->dead, 1)) { | 733 | struct kioctx_table *table; |
| 734 | struct kioctx_table *table; | ||
| 735 | 734 | ||
| 736 | spin_lock(&mm->ioctx_lock); | 735 | if (atomic_xchg(&ctx->dead, 1)) |
| 737 | rcu_read_lock(); | 736 | return -EINVAL; |
| 738 | table = rcu_dereference(mm->ioctx_table); | ||
| 739 | 737 | ||
| 740 | WARN_ON(ctx != table->table[ctx->id]); | ||
| 741 | table->table[ctx->id] = NULL; | ||
| 742 | rcu_read_unlock(); | ||
| 743 | spin_unlock(&mm->ioctx_lock); | ||
| 744 | 738 | ||
| 745 | /* percpu_ref_kill() will do the necessary call_rcu() */ | 739 | spin_lock(&mm->ioctx_lock); |
| 746 | wake_up_all(&ctx->wait); | 740 | rcu_read_lock(); |
| 741 | table = rcu_dereference(mm->ioctx_table); | ||
| 747 | 742 | ||
| 748 | /* | 743 | WARN_ON(ctx != table->table[ctx->id]); |
| 749 | * It'd be more correct to do this in free_ioctx(), after all | 744 | table->table[ctx->id] = NULL; |
| 750 | * the outstanding kiocbs have finished - but by then io_destroy | 745 | rcu_read_unlock(); |
| 751 | * has already returned, so io_setup() could potentially return | 746 | spin_unlock(&mm->ioctx_lock); |
| 752 | * -EAGAIN with no ioctxs actually in use (as far as userspace | ||
| 753 | * could tell). | ||
| 754 | */ | ||
| 755 | aio_nr_sub(ctx->max_reqs); | ||
| 756 | 747 | ||
| 757 | if (ctx->mmap_size) | 748 | /* percpu_ref_kill() will do the necessary call_rcu() */ |
| 758 | vm_munmap(ctx->mmap_base, ctx->mmap_size); | 749 | wake_up_all(&ctx->wait); |
| 759 | 750 | ||
| 760 | ctx->requests_done = requests_done; | 751 | /* |
| 761 | percpu_ref_kill(&ctx->users); | 752 | * It'd be more correct to do this in free_ioctx(), after all |
| 762 | } else { | 753 | * the outstanding kiocbs have finished - but by then io_destroy |
| 763 | if (requests_done) | 754 | * has already returned, so io_setup() could potentially return |
| 764 | complete(requests_done); | 755 | * -EAGAIN with no ioctxs actually in use (as far as userspace |
| 765 | } | 756 | * could tell). |
| 757 | */ | ||
| 758 | aio_nr_sub(ctx->max_reqs); | ||
| 759 | |||
| 760 | if (ctx->mmap_size) | ||
| 761 | vm_munmap(ctx->mmap_base, ctx->mmap_size); | ||
| 762 | |||
| 763 | ctx->requests_done = requests_done; | ||
| 764 | percpu_ref_kill(&ctx->users); | ||
| 765 | return 0; | ||
| 766 | } | 766 | } |
| 767 | 767 | ||
| 768 | /* wait_on_sync_kiocb: | 768 | /* wait_on_sync_kiocb: |
| @@ -1021,6 +1021,7 @@ void aio_complete(struct kiocb *iocb, long res, long res2) | |||
| 1021 | 1021 | ||
| 1022 | /* everything turned out well, dispose of the aiocb. */ | 1022 | /* everything turned out well, dispose of the aiocb. */ |
| 1023 | kiocb_free(iocb); | 1023 | kiocb_free(iocb); |
| 1024 | put_reqs_available(ctx, 1); | ||
| 1024 | 1025 | ||
| 1025 | /* | 1026 | /* |
| 1026 | * We have to order our ring_info tail store above and test | 1027 | * We have to order our ring_info tail store above and test |
| @@ -1062,6 +1063,9 @@ static long aio_read_events_ring(struct kioctx *ctx, | |||
| 1062 | if (head == tail) | 1063 | if (head == tail) |
| 1063 | goto out; | 1064 | goto out; |
| 1064 | 1065 | ||
| 1066 | head %= ctx->nr_events; | ||
| 1067 | tail %= ctx->nr_events; | ||
| 1068 | |||
| 1065 | while (ret < nr) { | 1069 | while (ret < nr) { |
| 1066 | long avail; | 1070 | long avail; |
| 1067 | struct io_event *ev; | 1071 | struct io_event *ev; |
| @@ -1100,8 +1104,6 @@ static long aio_read_events_ring(struct kioctx *ctx, | |||
| 1100 | flush_dcache_page(ctx->ring_pages[0]); | 1104 | flush_dcache_page(ctx->ring_pages[0]); |
| 1101 | 1105 | ||
| 1102 | pr_debug("%li h%u t%u\n", ret, head, tail); | 1106 | pr_debug("%li h%u t%u\n", ret, head, tail); |
| 1103 | |||
| 1104 | put_reqs_available(ctx, ret); | ||
| 1105 | out: | 1107 | out: |
| 1106 | mutex_unlock(&ctx->ring_lock); | 1108 | mutex_unlock(&ctx->ring_lock); |
| 1107 | 1109 | ||
| @@ -1219,21 +1221,23 @@ SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx) | |||
| 1219 | if (likely(NULL != ioctx)) { | 1221 | if (likely(NULL != ioctx)) { |
| 1220 | struct completion requests_done = | 1222 | struct completion requests_done = |
| 1221 | COMPLETION_INITIALIZER_ONSTACK(requests_done); | 1223 | COMPLETION_INITIALIZER_ONSTACK(requests_done); |
| 1224 | int ret; | ||
| 1222 | 1225 | ||
| 1223 | /* Pass requests_done to kill_ioctx() where it can be set | 1226 | /* Pass requests_done to kill_ioctx() where it can be set |
| 1224 | * in a thread-safe way. If we try to set it here then we have | 1227 | * in a thread-safe way. If we try to set it here then we have |
| 1225 | * a race condition if two io_destroy() called simultaneously. | 1228 | * a race condition if two io_destroy() called simultaneously. |
| 1226 | */ | 1229 | */ |
| 1227 | kill_ioctx(current->mm, ioctx, &requests_done); | 1230 | ret = kill_ioctx(current->mm, ioctx, &requests_done); |
| 1228 | percpu_ref_put(&ioctx->users); | 1231 | percpu_ref_put(&ioctx->users); |
| 1229 | 1232 | ||
| 1230 | /* Wait until all IO for the context are done. Otherwise kernel | 1233 | /* Wait until all IO for the context are done. Otherwise kernel |
| 1231 | * keep using user-space buffers even if user thinks the context | 1234 | * keep using user-space buffers even if user thinks the context |
| 1232 | * is destroyed. | 1235 | * is destroyed. |
| 1233 | */ | 1236 | */ |
| 1234 | wait_for_completion(&requests_done); | 1237 | if (!ret) |
| 1238 | wait_for_completion(&requests_done); | ||
| 1235 | 1239 | ||
| 1236 | return 0; | 1240 | return ret; |
| 1237 | } | 1241 | } |
| 1238 | pr_debug("EINVAL: io_destroy: invalid context id\n"); | 1242 | pr_debug("EINVAL: io_destroy: invalid context id\n"); |
| 1239 | return -EINVAL; | 1243 | return -EINVAL; |
| @@ -1241,6 +1245,7 @@ SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx) | |||
| 1241 | 1245 | ||
| 1242 | typedef ssize_t (aio_rw_op)(struct kiocb *, const struct iovec *, | 1246 | typedef ssize_t (aio_rw_op)(struct kiocb *, const struct iovec *, |
| 1243 | unsigned long, loff_t); | 1247 | unsigned long, loff_t); |
| 1248 | typedef ssize_t (rw_iter_op)(struct kiocb *, struct iov_iter *); | ||
| 1244 | 1249 | ||
| 1245 | static ssize_t aio_setup_vectored_rw(struct kiocb *kiocb, | 1250 | static ssize_t aio_setup_vectored_rw(struct kiocb *kiocb, |
| 1246 | int rw, char __user *buf, | 1251 | int rw, char __user *buf, |
| @@ -1298,7 +1303,9 @@ static ssize_t aio_run_iocb(struct kiocb *req, unsigned opcode, | |||
| 1298 | int rw; | 1303 | int rw; |
| 1299 | fmode_t mode; | 1304 | fmode_t mode; |
| 1300 | aio_rw_op *rw_op; | 1305 | aio_rw_op *rw_op; |
| 1306 | rw_iter_op *iter_op; | ||
| 1301 | struct iovec inline_vec, *iovec = &inline_vec; | 1307 | struct iovec inline_vec, *iovec = &inline_vec; |
| 1308 | struct iov_iter iter; | ||
| 1302 | 1309 | ||
| 1303 | switch (opcode) { | 1310 | switch (opcode) { |
| 1304 | case IOCB_CMD_PREAD: | 1311 | case IOCB_CMD_PREAD: |
| @@ -1306,6 +1313,7 @@ static ssize_t aio_run_iocb(struct kiocb *req, unsigned opcode, | |||
| 1306 | mode = FMODE_READ; | 1313 | mode = FMODE_READ; |
| 1307 | rw = READ; | 1314 | rw = READ; |
| 1308 | rw_op = file->f_op->aio_read; | 1315 | rw_op = file->f_op->aio_read; |
| 1316 | iter_op = file->f_op->read_iter; | ||
| 1309 | goto rw_common; | 1317 | goto rw_common; |
| 1310 | 1318 | ||
| 1311 | case IOCB_CMD_PWRITE: | 1319 | case IOCB_CMD_PWRITE: |
| @@ -1313,12 +1321,13 @@ static ssize_t aio_run_iocb(struct kiocb *req, unsigned opcode, | |||
| 1313 | mode = FMODE_WRITE; | 1321 | mode = FMODE_WRITE; |
| 1314 | rw = WRITE; | 1322 | rw = WRITE; |
| 1315 | rw_op = file->f_op->aio_write; | 1323 | rw_op = file->f_op->aio_write; |
| 1324 | iter_op = file->f_op->write_iter; | ||
| 1316 | goto rw_common; | 1325 | goto rw_common; |
| 1317 | rw_common: | 1326 | rw_common: |
| 1318 | if (unlikely(!(file->f_mode & mode))) | 1327 | if (unlikely(!(file->f_mode & mode))) |
| 1319 | return -EBADF; | 1328 | return -EBADF; |
| 1320 | 1329 | ||
| 1321 | if (!rw_op) | 1330 | if (!rw_op && !iter_op) |
| 1322 | return -EINVAL; | 1331 | return -EINVAL; |
| 1323 | 1332 | ||
| 1324 | ret = (opcode == IOCB_CMD_PREADV || | 1333 | ret = (opcode == IOCB_CMD_PREADV || |
| @@ -1347,7 +1356,12 @@ rw_common: | |||
| 1347 | if (rw == WRITE) | 1356 | if (rw == WRITE) |
| 1348 | file_start_write(file); | 1357 | file_start_write(file); |
| 1349 | 1358 | ||
| 1350 | ret = rw_op(req, iovec, nr_segs, req->ki_pos); | 1359 | if (iter_op) { |
| 1360 | iov_iter_init(&iter, rw, iovec, nr_segs, req->ki_nbytes); | ||
| 1361 | ret = iter_op(req, &iter); | ||
| 1362 | } else { | ||
| 1363 | ret = rw_op(req, iovec, nr_segs, req->ki_pos); | ||
| 1364 | } | ||
| 1351 | 1365 | ||
| 1352 | if (rw == WRITE) | 1366 | if (rw == WRITE) |
| 1353 | file_end_write(file); | 1367 | file_end_write(file); |
| @@ -1585,7 +1599,7 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb, | |||
| 1585 | 1599 | ||
| 1586 | kiocb = lookup_kiocb(ctx, iocb, key); | 1600 | kiocb = lookup_kiocb(ctx, iocb, key); |
| 1587 | if (kiocb) | 1601 | if (kiocb) |
| 1588 | ret = kiocb_cancel(ctx, kiocb); | 1602 | ret = kiocb_cancel(kiocb); |
| 1589 | else | 1603 | else |
| 1590 | ret = -EINVAL; | 1604 | ret = -EINVAL; |
| 1591 | 1605 | ||
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c index d7bd395ab586..1c55388ae633 100644 --- a/fs/autofs4/inode.c +++ b/fs/autofs4/inode.c | |||
| @@ -210,7 +210,7 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent) | |||
| 210 | int pipefd; | 210 | int pipefd; |
| 211 | struct autofs_sb_info *sbi; | 211 | struct autofs_sb_info *sbi; |
| 212 | struct autofs_info *ino; | 212 | struct autofs_info *ino; |
| 213 | int pgrp; | 213 | int pgrp = 0; |
| 214 | bool pgrp_set = false; | 214 | bool pgrp_set = false; |
| 215 | int ret = -EINVAL; | 215 | int ret = -EINVAL; |
| 216 | 216 | ||
diff --git a/fs/bfs/file.c b/fs/bfs/file.c index ae2892218335..e7f88ace1a25 100644 --- a/fs/bfs/file.c +++ b/fs/bfs/file.c | |||
| @@ -23,10 +23,10 @@ | |||
| 23 | 23 | ||
| 24 | const struct file_operations bfs_file_operations = { | 24 | const struct file_operations bfs_file_operations = { |
| 25 | .llseek = generic_file_llseek, | 25 | .llseek = generic_file_llseek, |
| 26 | .read = do_sync_read, | 26 | .read = new_sync_read, |
| 27 | .aio_read = generic_file_aio_read, | 27 | .read_iter = generic_file_read_iter, |
| 28 | .write = do_sync_write, | 28 | .write = new_sync_write, |
| 29 | .aio_write = generic_file_aio_write, | 29 | .write_iter = generic_file_write_iter, |
| 30 | .mmap = generic_file_mmap, | 30 | .mmap = generic_file_mmap, |
| 31 | .splice_read = generic_file_splice_read, | 31 | .splice_read = generic_file_splice_read, |
| 32 | }; | 32 | }; |
diff --git a/fs/block_dev.c b/fs/block_dev.c index 83fba15cc394..6d7274619bf9 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c | |||
| @@ -165,14 +165,15 @@ blkdev_get_block(struct inode *inode, sector_t iblock, | |||
| 165 | } | 165 | } |
| 166 | 166 | ||
| 167 | static ssize_t | 167 | static ssize_t |
| 168 | blkdev_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, | 168 | blkdev_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, |
| 169 | loff_t offset, unsigned long nr_segs) | 169 | loff_t offset) |
| 170 | { | 170 | { |
| 171 | struct file *file = iocb->ki_filp; | 171 | struct file *file = iocb->ki_filp; |
| 172 | struct inode *inode = file->f_mapping->host; | 172 | struct inode *inode = file->f_mapping->host; |
| 173 | 173 | ||
| 174 | return __blockdev_direct_IO(rw, iocb, inode, I_BDEV(inode), iov, offset, | 174 | return __blockdev_direct_IO(rw, iocb, inode, I_BDEV(inode), iter, |
| 175 | nr_segs, blkdev_get_block, NULL, NULL, 0); | 175 | offset, blkdev_get_block, |
| 176 | NULL, NULL, 0); | ||
| 176 | } | 177 | } |
| 177 | 178 | ||
| 178 | int __sync_blockdev(struct block_device *bdev, int wait) | 179 | int __sync_blockdev(struct block_device *bdev, int wait) |
| @@ -1571,43 +1572,38 @@ static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg) | |||
| 1571 | * Does not take i_mutex for the write and thus is not for general purpose | 1572 | * Does not take i_mutex for the write and thus is not for general purpose |
| 1572 | * use. | 1573 | * use. |
| 1573 | */ | 1574 | */ |
| 1574 | ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov, | 1575 | ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from) |
| 1575 | unsigned long nr_segs, loff_t pos) | ||
| 1576 | { | 1576 | { |
| 1577 | struct file *file = iocb->ki_filp; | 1577 | struct file *file = iocb->ki_filp; |
| 1578 | struct blk_plug plug; | 1578 | struct blk_plug plug; |
| 1579 | ssize_t ret; | 1579 | ssize_t ret; |
| 1580 | 1580 | ||
| 1581 | BUG_ON(iocb->ki_pos != pos); | ||
| 1582 | |||
| 1583 | blk_start_plug(&plug); | 1581 | blk_start_plug(&plug); |
| 1584 | ret = __generic_file_aio_write(iocb, iov, nr_segs); | 1582 | ret = __generic_file_write_iter(iocb, from); |
| 1585 | if (ret > 0) { | 1583 | if (ret > 0) { |
| 1586 | ssize_t err; | 1584 | ssize_t err; |
| 1587 | 1585 | err = generic_write_sync(file, iocb->ki_pos - ret, ret); | |
| 1588 | err = generic_write_sync(file, pos, ret); | ||
| 1589 | if (err < 0) | 1586 | if (err < 0) |
| 1590 | ret = err; | 1587 | ret = err; |
| 1591 | } | 1588 | } |
| 1592 | blk_finish_plug(&plug); | 1589 | blk_finish_plug(&plug); |
| 1593 | return ret; | 1590 | return ret; |
| 1594 | } | 1591 | } |
| 1595 | EXPORT_SYMBOL_GPL(blkdev_aio_write); | 1592 | EXPORT_SYMBOL_GPL(blkdev_write_iter); |
| 1596 | 1593 | ||
| 1597 | static ssize_t blkdev_aio_read(struct kiocb *iocb, const struct iovec *iov, | 1594 | static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to) |
| 1598 | unsigned long nr_segs, loff_t pos) | ||
| 1599 | { | 1595 | { |
| 1600 | struct file *file = iocb->ki_filp; | 1596 | struct file *file = iocb->ki_filp; |
| 1601 | struct inode *bd_inode = file->f_mapping->host; | 1597 | struct inode *bd_inode = file->f_mapping->host; |
| 1602 | loff_t size = i_size_read(bd_inode); | 1598 | loff_t size = i_size_read(bd_inode); |
| 1599 | loff_t pos = iocb->ki_pos; | ||
| 1603 | 1600 | ||
| 1604 | if (pos >= size) | 1601 | if (pos >= size) |
| 1605 | return 0; | 1602 | return 0; |
| 1606 | 1603 | ||
| 1607 | size -= pos; | 1604 | size -= pos; |
| 1608 | if (size < iocb->ki_nbytes) | 1605 | iov_iter_truncate(to, size); |
| 1609 | nr_segs = iov_shorten((struct iovec *)iov, nr_segs, size); | 1606 | return generic_file_read_iter(iocb, to); |
| 1610 | return generic_file_aio_read(iocb, iov, nr_segs, pos); | ||
| 1611 | } | 1607 | } |
| 1612 | 1608 | ||
| 1613 | /* | 1609 | /* |
| @@ -1639,10 +1635,10 @@ const struct file_operations def_blk_fops = { | |||
| 1639 | .open = blkdev_open, | 1635 | .open = blkdev_open, |
| 1640 | .release = blkdev_close, | 1636 | .release = blkdev_close, |
| 1641 | .llseek = block_llseek, | 1637 | .llseek = block_llseek, |
| 1642 | .read = do_sync_read, | 1638 | .read = new_sync_read, |
| 1643 | .write = do_sync_write, | 1639 | .write = new_sync_write, |
| 1644 | .aio_read = blkdev_aio_read, | 1640 | .read_iter = blkdev_read_iter, |
| 1645 | .aio_write = blkdev_aio_write, | 1641 | .write_iter = blkdev_write_iter, |
| 1646 | .mmap = generic_file_mmap, | 1642 | .mmap = generic_file_mmap, |
| 1647 | .fsync = blkdev_fsync, | 1643 | .fsync = blkdev_fsync, |
| 1648 | .unlocked_ioctl = block_ioctl, | 1644 | .unlocked_ioctl = block_ioctl, |
| @@ -1650,7 +1646,7 @@ const struct file_operations def_blk_fops = { | |||
| 1650 | .compat_ioctl = compat_blkdev_ioctl, | 1646 | .compat_ioctl = compat_blkdev_ioctl, |
| 1651 | #endif | 1647 | #endif |
| 1652 | .splice_read = generic_file_splice_read, | 1648 | .splice_read = generic_file_splice_read, |
| 1653 | .splice_write = generic_file_splice_write, | 1649 | .splice_write = iter_file_splice_write, |
| 1654 | }; | 1650 | }; |
| 1655 | 1651 | ||
| 1656 | int ioctl_by_bdev(struct block_device *bdev, unsigned cmd, unsigned long arg) | 1652 | int ioctl_by_bdev(struct block_device *bdev, unsigned cmd, unsigned long arg) |
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 92371c414228..1daea0b47187 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c | |||
| @@ -821,7 +821,7 @@ static void free_workspace(int type, struct list_head *workspace) | |||
| 821 | 821 | ||
| 822 | spin_lock(workspace_lock); | 822 | spin_lock(workspace_lock); |
| 823 | if (*num_workspace < num_online_cpus()) { | 823 | if (*num_workspace < num_online_cpus()) { |
| 824 | list_add_tail(workspace, idle_workspace); | 824 | list_add(workspace, idle_workspace); |
| 825 | (*num_workspace)++; | 825 | (*num_workspace)++; |
| 826 | spin_unlock(workspace_lock); | 826 | spin_unlock(workspace_lock); |
| 827 | goto wake; | 827 | goto wake; |
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index b7e2c1c1ef36..be91397f4e92 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
| @@ -1259,11 +1259,19 @@ struct btrfs_block_group_cache { | |||
| 1259 | spinlock_t lock; | 1259 | spinlock_t lock; |
| 1260 | u64 pinned; | 1260 | u64 pinned; |
| 1261 | u64 reserved; | 1261 | u64 reserved; |
| 1262 | u64 delalloc_bytes; | ||
| 1262 | u64 bytes_super; | 1263 | u64 bytes_super; |
| 1263 | u64 flags; | 1264 | u64 flags; |
| 1264 | u64 sectorsize; | 1265 | u64 sectorsize; |
| 1265 | u64 cache_generation; | 1266 | u64 cache_generation; |
| 1266 | 1267 | ||
| 1268 | /* | ||
| 1269 | * It is just used for the delayed data space allocation because | ||
| 1270 | * only the data space allocation and the relative metadata update | ||
| 1271 | * can be done cross the transaction. | ||
| 1272 | */ | ||
| 1273 | struct rw_semaphore data_rwsem; | ||
| 1274 | |||
| 1267 | /* for raid56, this is a full stripe, without parity */ | 1275 | /* for raid56, this is a full stripe, without parity */ |
| 1268 | unsigned long full_stripe_len; | 1276 | unsigned long full_stripe_len; |
| 1269 | 1277 | ||
| @@ -3316,7 +3324,7 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, | |||
| 3316 | struct btrfs_key *ins); | 3324 | struct btrfs_key *ins); |
| 3317 | int btrfs_reserve_extent(struct btrfs_root *root, u64 num_bytes, | 3325 | int btrfs_reserve_extent(struct btrfs_root *root, u64 num_bytes, |
| 3318 | u64 min_alloc_size, u64 empty_size, u64 hint_byte, | 3326 | u64 min_alloc_size, u64 empty_size, u64 hint_byte, |
| 3319 | struct btrfs_key *ins, int is_data); | 3327 | struct btrfs_key *ins, int is_data, int delalloc); |
| 3320 | int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, | 3328 | int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, |
| 3321 | struct extent_buffer *buf, int full_backref, int no_quota); | 3329 | struct extent_buffer *buf, int full_backref, int no_quota); |
| 3322 | int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, | 3330 | int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, |
| @@ -3330,7 +3338,8 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
| 3330 | u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid, | 3338 | u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid, |
| 3331 | u64 owner, u64 offset, int no_quota); | 3339 | u64 owner, u64 offset, int no_quota); |
| 3332 | 3340 | ||
| 3333 | int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len); | 3341 | int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len, |
| 3342 | int delalloc); | ||
| 3334 | int btrfs_free_and_pin_reserved_extent(struct btrfs_root *root, | 3343 | int btrfs_free_and_pin_reserved_extent(struct btrfs_root *root, |
| 3335 | u64 start, u64 len); | 3344 | u64 start, u64 len); |
| 3336 | void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, | 3345 | void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, |
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index 2af6e66fe788..eea26e1b2fda 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c | |||
| @@ -36,6 +36,7 @@ | |||
| 36 | #include "check-integrity.h" | 36 | #include "check-integrity.h" |
| 37 | #include "rcu-string.h" | 37 | #include "rcu-string.h" |
| 38 | #include "dev-replace.h" | 38 | #include "dev-replace.h" |
| 39 | #include "sysfs.h" | ||
| 39 | 40 | ||
| 40 | static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, | 41 | static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, |
| 41 | int scrub_ret); | 42 | int scrub_ret); |
| @@ -562,6 +563,10 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, | |||
| 562 | fs_info->fs_devices->latest_bdev = tgt_device->bdev; | 563 | fs_info->fs_devices->latest_bdev = tgt_device->bdev; |
| 563 | list_add(&tgt_device->dev_alloc_list, &fs_info->fs_devices->alloc_list); | 564 | list_add(&tgt_device->dev_alloc_list, &fs_info->fs_devices->alloc_list); |
| 564 | 565 | ||
| 566 | /* replace the sysfs entry */ | ||
| 567 | btrfs_kobj_rm_device(fs_info, src_device); | ||
| 568 | btrfs_kobj_add_device(fs_info, tgt_device); | ||
| 569 | |||
| 565 | btrfs_rm_dev_replace_blocked(fs_info); | 570 | btrfs_rm_dev_replace_blocked(fs_info); |
| 566 | 571 | ||
| 567 | btrfs_rm_dev_replace_srcdev(fs_info, src_device); | 572 | btrfs_rm_dev_replace_srcdev(fs_info, src_device); |
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 8bb4aa19898f..08e65e9cf2aa 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
| @@ -369,7 +369,8 @@ static int verify_parent_transid(struct extent_io_tree *io_tree, | |||
| 369 | out: | 369 | out: |
| 370 | unlock_extent_cached(io_tree, eb->start, eb->start + eb->len - 1, | 370 | unlock_extent_cached(io_tree, eb->start, eb->start + eb->len - 1, |
| 371 | &cached_state, GFP_NOFS); | 371 | &cached_state, GFP_NOFS); |
| 372 | btrfs_tree_read_unlock_blocking(eb); | 372 | if (need_lock) |
| 373 | btrfs_tree_read_unlock_blocking(eb); | ||
| 373 | return ret; | 374 | return ret; |
| 374 | } | 375 | } |
| 375 | 376 | ||
| @@ -2904,7 +2905,9 @@ retry_root_backup: | |||
| 2904 | if (ret) | 2905 | if (ret) |
| 2905 | goto fail_qgroup; | 2906 | goto fail_qgroup; |
| 2906 | 2907 | ||
| 2908 | mutex_lock(&fs_info->cleaner_mutex); | ||
| 2907 | ret = btrfs_recover_relocation(tree_root); | 2909 | ret = btrfs_recover_relocation(tree_root); |
| 2910 | mutex_unlock(&fs_info->cleaner_mutex); | ||
| 2908 | if (ret < 0) { | 2911 | if (ret < 0) { |
| 2909 | printk(KERN_WARNING | 2912 | printk(KERN_WARNING |
| 2910 | "BTRFS: failed to recover relocation\n"); | 2913 | "BTRFS: failed to recover relocation\n"); |
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index fafb3e53ecde..813537f362f9 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
| @@ -105,7 +105,8 @@ static int find_next_key(struct btrfs_path *path, int level, | |||
| 105 | static void dump_space_info(struct btrfs_space_info *info, u64 bytes, | 105 | static void dump_space_info(struct btrfs_space_info *info, u64 bytes, |
| 106 | int dump_block_groups); | 106 | int dump_block_groups); |
| 107 | static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache, | 107 | static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache, |
| 108 | u64 num_bytes, int reserve); | 108 | u64 num_bytes, int reserve, |
| 109 | int delalloc); | ||
| 109 | static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv, | 110 | static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv, |
| 110 | u64 num_bytes); | 111 | u64 num_bytes); |
| 111 | int btrfs_pin_extent(struct btrfs_root *root, | 112 | int btrfs_pin_extent(struct btrfs_root *root, |
| @@ -3260,7 +3261,8 @@ again: | |||
| 3260 | 3261 | ||
| 3261 | spin_lock(&block_group->lock); | 3262 | spin_lock(&block_group->lock); |
| 3262 | if (block_group->cached != BTRFS_CACHE_FINISHED || | 3263 | if (block_group->cached != BTRFS_CACHE_FINISHED || |
| 3263 | !btrfs_test_opt(root, SPACE_CACHE)) { | 3264 | !btrfs_test_opt(root, SPACE_CACHE) || |
| 3265 | block_group->delalloc_bytes) { | ||
| 3264 | /* | 3266 | /* |
| 3265 | * don't bother trying to write stuff out _if_ | 3267 | * don't bother trying to write stuff out _if_ |
| 3266 | * a) we're not cached, | 3268 | * a) we're not cached, |
| @@ -5613,6 +5615,7 @@ int btrfs_exclude_logged_extents(struct btrfs_root *log, | |||
| 5613 | * @cache: The cache we are manipulating | 5615 | * @cache: The cache we are manipulating |
| 5614 | * @num_bytes: The number of bytes in question | 5616 | * @num_bytes: The number of bytes in question |
| 5615 | * @reserve: One of the reservation enums | 5617 | * @reserve: One of the reservation enums |
| 5618 | * @delalloc: The blocks are allocated for the delalloc write | ||
| 5616 | * | 5619 | * |
| 5617 | * This is called by the allocator when it reserves space, or by somebody who is | 5620 | * This is called by the allocator when it reserves space, or by somebody who is |
| 5618 | * freeing space that was never actually used on disk. For example if you | 5621 | * freeing space that was never actually used on disk. For example if you |
| @@ -5631,7 +5634,7 @@ int btrfs_exclude_logged_extents(struct btrfs_root *log, | |||
| 5631 | * succeeds. | 5634 | * succeeds. |
| 5632 | */ | 5635 | */ |
| 5633 | static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache, | 5636 | static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache, |
| 5634 | u64 num_bytes, int reserve) | 5637 | u64 num_bytes, int reserve, int delalloc) |
| 5635 | { | 5638 | { |
| 5636 | struct btrfs_space_info *space_info = cache->space_info; | 5639 | struct btrfs_space_info *space_info = cache->space_info; |
| 5637 | int ret = 0; | 5640 | int ret = 0; |
| @@ -5650,12 +5653,18 @@ static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache, | |||
| 5650 | num_bytes, 0); | 5653 | num_bytes, 0); |
| 5651 | space_info->bytes_may_use -= num_bytes; | 5654 | space_info->bytes_may_use -= num_bytes; |
| 5652 | } | 5655 | } |
| 5656 | |||
| 5657 | if (delalloc) | ||
| 5658 | cache->delalloc_bytes += num_bytes; | ||
| 5653 | } | 5659 | } |
| 5654 | } else { | 5660 | } else { |
| 5655 | if (cache->ro) | 5661 | if (cache->ro) |
| 5656 | space_info->bytes_readonly += num_bytes; | 5662 | space_info->bytes_readonly += num_bytes; |
| 5657 | cache->reserved -= num_bytes; | 5663 | cache->reserved -= num_bytes; |
| 5658 | space_info->bytes_reserved -= num_bytes; | 5664 | space_info->bytes_reserved -= num_bytes; |
| 5665 | |||
| 5666 | if (delalloc) | ||
| 5667 | cache->delalloc_bytes -= num_bytes; | ||
| 5659 | } | 5668 | } |
| 5660 | spin_unlock(&cache->lock); | 5669 | spin_unlock(&cache->lock); |
| 5661 | spin_unlock(&space_info->lock); | 5670 | spin_unlock(&space_info->lock); |
| @@ -5669,7 +5678,6 @@ void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, | |||
| 5669 | struct btrfs_caching_control *next; | 5678 | struct btrfs_caching_control *next; |
| 5670 | struct btrfs_caching_control *caching_ctl; | 5679 | struct btrfs_caching_control *caching_ctl; |
| 5671 | struct btrfs_block_group_cache *cache; | 5680 | struct btrfs_block_group_cache *cache; |
| 5672 | struct btrfs_space_info *space_info; | ||
| 5673 | 5681 | ||
| 5674 | down_write(&fs_info->commit_root_sem); | 5682 | down_write(&fs_info->commit_root_sem); |
| 5675 | 5683 | ||
| @@ -5692,9 +5700,6 @@ void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, | |||
| 5692 | 5700 | ||
| 5693 | up_write(&fs_info->commit_root_sem); | 5701 | up_write(&fs_info->commit_root_sem); |
| 5694 | 5702 | ||
| 5695 | list_for_each_entry_rcu(space_info, &fs_info->space_info, list) | ||
| 5696 | percpu_counter_set(&space_info->total_bytes_pinned, 0); | ||
| 5697 | |||
| 5698 | update_global_block_rsv(fs_info); | 5703 | update_global_block_rsv(fs_info); |
| 5699 | } | 5704 | } |
| 5700 | 5705 | ||
| @@ -5732,6 +5737,7 @@ static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end) | |||
| 5732 | spin_lock(&cache->lock); | 5737 | spin_lock(&cache->lock); |
| 5733 | cache->pinned -= len; | 5738 | cache->pinned -= len; |
| 5734 | space_info->bytes_pinned -= len; | 5739 | space_info->bytes_pinned -= len; |
| 5740 | percpu_counter_add(&space_info->total_bytes_pinned, -len); | ||
| 5735 | if (cache->ro) { | 5741 | if (cache->ro) { |
| 5736 | space_info->bytes_readonly += len; | 5742 | space_info->bytes_readonly += len; |
| 5737 | readonly = true; | 5743 | readonly = true; |
| @@ -6206,7 +6212,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans, | |||
| 6206 | WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)); | 6212 | WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)); |
| 6207 | 6213 | ||
| 6208 | btrfs_add_free_space(cache, buf->start, buf->len); | 6214 | btrfs_add_free_space(cache, buf->start, buf->len); |
| 6209 | btrfs_update_reserved_bytes(cache, buf->len, RESERVE_FREE); | 6215 | btrfs_update_reserved_bytes(cache, buf->len, RESERVE_FREE, 0); |
| 6210 | trace_btrfs_reserved_extent_free(root, buf->start, buf->len); | 6216 | trace_btrfs_reserved_extent_free(root, buf->start, buf->len); |
| 6211 | pin = 0; | 6217 | pin = 0; |
| 6212 | } | 6218 | } |
| @@ -6365,6 +6371,70 @@ enum btrfs_loop_type { | |||
| 6365 | LOOP_NO_EMPTY_SIZE = 3, | 6371 | LOOP_NO_EMPTY_SIZE = 3, |
| 6366 | }; | 6372 | }; |
| 6367 | 6373 | ||
| 6374 | static inline void | ||
| 6375 | btrfs_lock_block_group(struct btrfs_block_group_cache *cache, | ||
| 6376 | int delalloc) | ||
| 6377 | { | ||
| 6378 | if (delalloc) | ||
| 6379 | down_read(&cache->data_rwsem); | ||
| 6380 | } | ||
| 6381 | |||
| 6382 | static inline void | ||
| 6383 | btrfs_grab_block_group(struct btrfs_block_group_cache *cache, | ||
| 6384 | int delalloc) | ||
| 6385 | { | ||
| 6386 | btrfs_get_block_group(cache); | ||
| 6387 | if (delalloc) | ||
| 6388 | down_read(&cache->data_rwsem); | ||
| 6389 | } | ||
| 6390 | |||
| 6391 | static struct btrfs_block_group_cache * | ||
| 6392 | btrfs_lock_cluster(struct btrfs_block_group_cache *block_group, | ||
| 6393 | struct btrfs_free_cluster *cluster, | ||
| 6394 | int delalloc) | ||
| 6395 | { | ||
| 6396 | struct btrfs_block_group_cache *used_bg; | ||
| 6397 | bool locked = false; | ||
| 6398 | again: | ||
| 6399 | spin_lock(&cluster->refill_lock); | ||
| 6400 | if (locked) { | ||
| 6401 | if (used_bg == cluster->block_group) | ||
| 6402 | return used_bg; | ||
| 6403 | |||
| 6404 | up_read(&used_bg->data_rwsem); | ||
| 6405 | btrfs_put_block_group(used_bg); | ||
| 6406 | } | ||
| 6407 | |||
| 6408 | used_bg = cluster->block_group; | ||
| 6409 | if (!used_bg) | ||
| 6410 | return NULL; | ||
| 6411 | |||
| 6412 | if (used_bg == block_group) | ||
| 6413 | return used_bg; | ||
| 6414 | |||
| 6415 | btrfs_get_block_group(used_bg); | ||
| 6416 | |||
| 6417 | if (!delalloc) | ||
| 6418 | return used_bg; | ||
| 6419 | |||
| 6420 | if (down_read_trylock(&used_bg->data_rwsem)) | ||
| 6421 | return used_bg; | ||
| 6422 | |||
| 6423 | spin_unlock(&cluster->refill_lock); | ||
| 6424 | down_read(&used_bg->data_rwsem); | ||
| 6425 | locked = true; | ||
| 6426 | goto again; | ||
| 6427 | } | ||
| 6428 | |||
| 6429 | static inline void | ||
| 6430 | btrfs_release_block_group(struct btrfs_block_group_cache *cache, | ||
| 6431 | int delalloc) | ||
| 6432 | { | ||
| 6433 | if (delalloc) | ||
| 6434 | up_read(&cache->data_rwsem); | ||
| 6435 | btrfs_put_block_group(cache); | ||
| 6436 | } | ||
| 6437 | |||
| 6368 | /* | 6438 | /* |
| 6369 | * walks the btree of allocated extents and find a hole of a given size. | 6439 | * walks the btree of allocated extents and find a hole of a given size. |
| 6370 | * The key ins is changed to record the hole: | 6440 | * The key ins is changed to record the hole: |
| @@ -6379,7 +6449,7 @@ enum btrfs_loop_type { | |||
| 6379 | static noinline int find_free_extent(struct btrfs_root *orig_root, | 6449 | static noinline int find_free_extent(struct btrfs_root *orig_root, |
| 6380 | u64 num_bytes, u64 empty_size, | 6450 | u64 num_bytes, u64 empty_size, |
| 6381 | u64 hint_byte, struct btrfs_key *ins, | 6451 | u64 hint_byte, struct btrfs_key *ins, |
| 6382 | u64 flags) | 6452 | u64 flags, int delalloc) |
| 6383 | { | 6453 | { |
| 6384 | int ret = 0; | 6454 | int ret = 0; |
| 6385 | struct btrfs_root *root = orig_root->fs_info->extent_root; | 6455 | struct btrfs_root *root = orig_root->fs_info->extent_root; |
| @@ -6467,6 +6537,7 @@ static noinline int find_free_extent(struct btrfs_root *orig_root, | |||
| 6467 | up_read(&space_info->groups_sem); | 6537 | up_read(&space_info->groups_sem); |
| 6468 | } else { | 6538 | } else { |
| 6469 | index = get_block_group_index(block_group); | 6539 | index = get_block_group_index(block_group); |
| 6540 | btrfs_lock_block_group(block_group, delalloc); | ||
| 6470 | goto have_block_group; | 6541 | goto have_block_group; |
| 6471 | } | 6542 | } |
| 6472 | } else if (block_group) { | 6543 | } else if (block_group) { |
| @@ -6481,7 +6552,7 @@ search: | |||
| 6481 | u64 offset; | 6552 | u64 offset; |
| 6482 | int cached; | 6553 | int cached; |
| 6483 | 6554 | ||
| 6484 | btrfs_get_block_group(block_group); | 6555 | btrfs_grab_block_group(block_group, delalloc); |
| 6485 | search_start = block_group->key.objectid; | 6556 | search_start = block_group->key.objectid; |
| 6486 | 6557 | ||
| 6487 | /* | 6558 | /* |
| @@ -6529,16 +6600,16 @@ have_block_group: | |||
| 6529 | * the refill lock keeps out other | 6600 | * the refill lock keeps out other |
| 6530 | * people trying to start a new cluster | 6601 | * people trying to start a new cluster |
| 6531 | */ | 6602 | */ |
| 6532 | spin_lock(&last_ptr->refill_lock); | 6603 | used_block_group = btrfs_lock_cluster(block_group, |
| 6533 | used_block_group = last_ptr->block_group; | 6604 | last_ptr, |
| 6534 | if (used_block_group != block_group && | 6605 | delalloc); |
| 6535 | (!used_block_group || | 6606 | if (!used_block_group) |
| 6536 | used_block_group->ro || | ||
| 6537 | !block_group_bits(used_block_group, flags))) | ||
| 6538 | goto refill_cluster; | 6607 | goto refill_cluster; |
| 6539 | 6608 | ||
| 6540 | if (used_block_group != block_group) | 6609 | if (used_block_group != block_group && |
| 6541 | btrfs_get_block_group(used_block_group); | 6610 | (used_block_group->ro || |
| 6611 | !block_group_bits(used_block_group, flags))) | ||
| 6612 | goto release_cluster; | ||
| 6542 | 6613 | ||
| 6543 | offset = btrfs_alloc_from_cluster(used_block_group, | 6614 | offset = btrfs_alloc_from_cluster(used_block_group, |
| 6544 | last_ptr, | 6615 | last_ptr, |
| @@ -6552,16 +6623,15 @@ have_block_group: | |||
| 6552 | used_block_group, | 6623 | used_block_group, |
| 6553 | search_start, num_bytes); | 6624 | search_start, num_bytes); |
| 6554 | if (used_block_group != block_group) { | 6625 | if (used_block_group != block_group) { |
| 6555 | btrfs_put_block_group(block_group); | 6626 | btrfs_release_block_group(block_group, |
| 6627 | delalloc); | ||
| 6556 | block_group = used_block_group; | 6628 | block_group = used_block_group; |
| 6557 | } | 6629 | } |
| 6558 | goto checks; | 6630 | goto checks; |
| 6559 | } | 6631 | } |
| 6560 | 6632 | ||
| 6561 | WARN_ON(last_ptr->block_group != used_block_group); | 6633 | WARN_ON(last_ptr->block_group != used_block_group); |
| 6562 | if (used_block_group != block_group) | 6634 | release_cluster: |
| 6563 | btrfs_put_block_group(used_block_group); | ||
| 6564 | refill_cluster: | ||
| 6565 | /* If we are on LOOP_NO_EMPTY_SIZE, we can't | 6635 | /* If we are on LOOP_NO_EMPTY_SIZE, we can't |
| 6566 | * set up a new clusters, so lets just skip it | 6636 | * set up a new clusters, so lets just skip it |
| 6567 | * and let the allocator find whatever block | 6637 | * and let the allocator find whatever block |
| @@ -6578,8 +6648,10 @@ refill_cluster: | |||
| 6578 | * succeeding in the unclustered | 6648 | * succeeding in the unclustered |
| 6579 | * allocation. */ | 6649 | * allocation. */ |
| 6580 | if (loop >= LOOP_NO_EMPTY_SIZE && | 6650 | if (loop >= LOOP_NO_EMPTY_SIZE && |
| 6581 | last_ptr->block_group != block_group) { | 6651 | used_block_group != block_group) { |
| 6582 | spin_unlock(&last_ptr->refill_lock); | 6652 | spin_unlock(&last_ptr->refill_lock); |
| 6653 | btrfs_release_block_group(used_block_group, | ||
| 6654 | delalloc); | ||
| 6583 | goto unclustered_alloc; | 6655 | goto unclustered_alloc; |
| 6584 | } | 6656 | } |
| 6585 | 6657 | ||
| @@ -6589,6 +6661,10 @@ refill_cluster: | |||
| 6589 | */ | 6661 | */ |
| 6590 | btrfs_return_cluster_to_free_space(NULL, last_ptr); | 6662 | btrfs_return_cluster_to_free_space(NULL, last_ptr); |
| 6591 | 6663 | ||
| 6664 | if (used_block_group != block_group) | ||
| 6665 | btrfs_release_block_group(used_block_group, | ||
| 6666 | delalloc); | ||
| 6667 | refill_cluster: | ||
| 6592 | if (loop >= LOOP_NO_EMPTY_SIZE) { | 6668 | if (loop >= LOOP_NO_EMPTY_SIZE) { |
| 6593 | spin_unlock(&last_ptr->refill_lock); | 6669 | spin_unlock(&last_ptr->refill_lock); |
| 6594 | goto unclustered_alloc; | 6670 | goto unclustered_alloc; |
| @@ -6696,7 +6772,7 @@ checks: | |||
| 6696 | BUG_ON(offset > search_start); | 6772 | BUG_ON(offset > search_start); |
| 6697 | 6773 | ||
| 6698 | ret = btrfs_update_reserved_bytes(block_group, num_bytes, | 6774 | ret = btrfs_update_reserved_bytes(block_group, num_bytes, |
| 6699 | alloc_type); | 6775 | alloc_type, delalloc); |
| 6700 | if (ret == -EAGAIN) { | 6776 | if (ret == -EAGAIN) { |
| 6701 | btrfs_add_free_space(block_group, offset, num_bytes); | 6777 | btrfs_add_free_space(block_group, offset, num_bytes); |
| 6702 | goto loop; | 6778 | goto loop; |
| @@ -6708,13 +6784,13 @@ checks: | |||
| 6708 | 6784 | ||
| 6709 | trace_btrfs_reserve_extent(orig_root, block_group, | 6785 | trace_btrfs_reserve_extent(orig_root, block_group, |
| 6710 | search_start, num_bytes); | 6786 | search_start, num_bytes); |
| 6711 | btrfs_put_block_group(block_group); | 6787 | btrfs_release_block_group(block_group, delalloc); |
| 6712 | break; | 6788 | break; |
| 6713 | loop: | 6789 | loop: |
| 6714 | failed_cluster_refill = false; | 6790 | failed_cluster_refill = false; |
| 6715 | failed_alloc = false; | 6791 | failed_alloc = false; |
| 6716 | BUG_ON(index != get_block_group_index(block_group)); | 6792 | BUG_ON(index != get_block_group_index(block_group)); |
| 6717 | btrfs_put_block_group(block_group); | 6793 | btrfs_release_block_group(block_group, delalloc); |
| 6718 | } | 6794 | } |
| 6719 | up_read(&space_info->groups_sem); | 6795 | up_read(&space_info->groups_sem); |
| 6720 | 6796 | ||
| @@ -6827,7 +6903,7 @@ again: | |||
| 6827 | int btrfs_reserve_extent(struct btrfs_root *root, | 6903 | int btrfs_reserve_extent(struct btrfs_root *root, |
| 6828 | u64 num_bytes, u64 min_alloc_size, | 6904 | u64 num_bytes, u64 min_alloc_size, |
| 6829 | u64 empty_size, u64 hint_byte, | 6905 | u64 empty_size, u64 hint_byte, |
| 6830 | struct btrfs_key *ins, int is_data) | 6906 | struct btrfs_key *ins, int is_data, int delalloc) |
| 6831 | { | 6907 | { |
| 6832 | bool final_tried = false; | 6908 | bool final_tried = false; |
| 6833 | u64 flags; | 6909 | u64 flags; |
| @@ -6837,7 +6913,7 @@ int btrfs_reserve_extent(struct btrfs_root *root, | |||
| 6837 | again: | 6913 | again: |
| 6838 | WARN_ON(num_bytes < root->sectorsize); | 6914 | WARN_ON(num_bytes < root->sectorsize); |
| 6839 | ret = find_free_extent(root, num_bytes, empty_size, hint_byte, ins, | 6915 | ret = find_free_extent(root, num_bytes, empty_size, hint_byte, ins, |
| 6840 | flags); | 6916 | flags, delalloc); |
| 6841 | 6917 | ||
| 6842 | if (ret == -ENOSPC) { | 6918 | if (ret == -ENOSPC) { |
| 6843 | if (!final_tried && ins->offset) { | 6919 | if (!final_tried && ins->offset) { |
| @@ -6862,7 +6938,8 @@ again: | |||
| 6862 | } | 6938 | } |
| 6863 | 6939 | ||
| 6864 | static int __btrfs_free_reserved_extent(struct btrfs_root *root, | 6940 | static int __btrfs_free_reserved_extent(struct btrfs_root *root, |
| 6865 | u64 start, u64 len, int pin) | 6941 | u64 start, u64 len, |
| 6942 | int pin, int delalloc) | ||
| 6866 | { | 6943 | { |
| 6867 | struct btrfs_block_group_cache *cache; | 6944 | struct btrfs_block_group_cache *cache; |
| 6868 | int ret = 0; | 6945 | int ret = 0; |
| @@ -6881,7 +6958,7 @@ static int __btrfs_free_reserved_extent(struct btrfs_root *root, | |||
| 6881 | pin_down_extent(root, cache, start, len, 1); | 6958 | pin_down_extent(root, cache, start, len, 1); |
| 6882 | else { | 6959 | else { |
| 6883 | btrfs_add_free_space(cache, start, len); | 6960 | btrfs_add_free_space(cache, start, len); |
| 6884 | btrfs_update_reserved_bytes(cache, len, RESERVE_FREE); | 6961 | btrfs_update_reserved_bytes(cache, len, RESERVE_FREE, delalloc); |
| 6885 | } | 6962 | } |
| 6886 | btrfs_put_block_group(cache); | 6963 | btrfs_put_block_group(cache); |
| 6887 | 6964 | ||
| @@ -6891,15 +6968,15 @@ static int __btrfs_free_reserved_extent(struct btrfs_root *root, | |||
| 6891 | } | 6968 | } |
| 6892 | 6969 | ||
| 6893 | int btrfs_free_reserved_extent(struct btrfs_root *root, | 6970 | int btrfs_free_reserved_extent(struct btrfs_root *root, |
| 6894 | u64 start, u64 len) | 6971 | u64 start, u64 len, int delalloc) |
| 6895 | { | 6972 | { |
| 6896 | return __btrfs_free_reserved_extent(root, start, len, 0); | 6973 | return __btrfs_free_reserved_extent(root, start, len, 0, delalloc); |
| 6897 | } | 6974 | } |
| 6898 | 6975 | ||
| 6899 | int btrfs_free_and_pin_reserved_extent(struct btrfs_root *root, | 6976 | int btrfs_free_and_pin_reserved_extent(struct btrfs_root *root, |
| 6900 | u64 start, u64 len) | 6977 | u64 start, u64 len) |
| 6901 | { | 6978 | { |
| 6902 | return __btrfs_free_reserved_extent(root, start, len, 1); | 6979 | return __btrfs_free_reserved_extent(root, start, len, 1, 0); |
| 6903 | } | 6980 | } |
| 6904 | 6981 | ||
| 6905 | static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans, | 6982 | static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans, |
| @@ -7114,7 +7191,7 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, | |||
| 7114 | return -EINVAL; | 7191 | return -EINVAL; |
| 7115 | 7192 | ||
| 7116 | ret = btrfs_update_reserved_bytes(block_group, ins->offset, | 7193 | ret = btrfs_update_reserved_bytes(block_group, ins->offset, |
| 7117 | RESERVE_ALLOC_NO_ACCOUNT); | 7194 | RESERVE_ALLOC_NO_ACCOUNT, 0); |
| 7118 | BUG_ON(ret); /* logic error */ | 7195 | BUG_ON(ret); /* logic error */ |
| 7119 | ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, | 7196 | ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, |
| 7120 | 0, owner, offset, ins, 1); | 7197 | 0, owner, offset, ins, 1); |
| @@ -7256,7 +7333,7 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, | |||
| 7256 | return ERR_CAST(block_rsv); | 7333 | return ERR_CAST(block_rsv); |
| 7257 | 7334 | ||
| 7258 | ret = btrfs_reserve_extent(root, blocksize, blocksize, | 7335 | ret = btrfs_reserve_extent(root, blocksize, blocksize, |
| 7259 | empty_size, hint, &ins, 0); | 7336 | empty_size, hint, &ins, 0, 0); |
| 7260 | if (ret) { | 7337 | if (ret) { |
| 7261 | unuse_block_rsv(root->fs_info, block_rsv, blocksize); | 7338 | unuse_block_rsv(root->fs_info, block_rsv, blocksize); |
| 7262 | return ERR_PTR(ret); | 7339 | return ERR_PTR(ret); |
| @@ -8659,6 +8736,7 @@ btrfs_create_block_group_cache(struct btrfs_root *root, u64 start, u64 size) | |||
| 8659 | start); | 8736 | start); |
| 8660 | atomic_set(&cache->count, 1); | 8737 | atomic_set(&cache->count, 1); |
| 8661 | spin_lock_init(&cache->lock); | 8738 | spin_lock_init(&cache->lock); |
| 8739 | init_rwsem(&cache->data_rwsem); | ||
| 8662 | INIT_LIST_HEAD(&cache->list); | 8740 | INIT_LIST_HEAD(&cache->list); |
| 8663 | INIT_LIST_HEAD(&cache->cluster_list); | 8741 | INIT_LIST_HEAD(&cache->cluster_list); |
| 8664 | INIT_LIST_HEAD(&cache->new_bg_list); | 8742 | INIT_LIST_HEAD(&cache->new_bg_list); |
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index f25a9092b946..a389820d158b 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
| @@ -2354,7 +2354,7 @@ int end_extent_writepage(struct page *page, int err, u64 start, u64 end) | |||
| 2354 | { | 2354 | { |
| 2355 | int uptodate = (err == 0); | 2355 | int uptodate = (err == 0); |
| 2356 | struct extent_io_tree *tree; | 2356 | struct extent_io_tree *tree; |
| 2357 | int ret; | 2357 | int ret = 0; |
| 2358 | 2358 | ||
| 2359 | tree = &BTRFS_I(page->mapping->host)->io_tree; | 2359 | tree = &BTRFS_I(page->mapping->host)->io_tree; |
| 2360 | 2360 | ||
| @@ -5068,6 +5068,43 @@ void read_extent_buffer(struct extent_buffer *eb, void *dstv, | |||
| 5068 | } | 5068 | } |
| 5069 | } | 5069 | } |
| 5070 | 5070 | ||
| 5071 | int read_extent_buffer_to_user(struct extent_buffer *eb, void __user *dstv, | ||
| 5072 | unsigned long start, | ||
| 5073 | unsigned long len) | ||
| 5074 | { | ||
| 5075 | size_t cur; | ||
| 5076 | size_t offset; | ||
| 5077 | struct page *page; | ||
| 5078 | char *kaddr; | ||
| 5079 | char __user *dst = (char __user *)dstv; | ||
| 5080 | size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); | ||
| 5081 | unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; | ||
| 5082 | int ret = 0; | ||
| 5083 | |||
| 5084 | WARN_ON(start > eb->len); | ||
| 5085 | WARN_ON(start + len > eb->start + eb->len); | ||
| 5086 | |||
| 5087 | offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1); | ||
| 5088 | |||
| 5089 | while (len > 0) { | ||
| 5090 | page = extent_buffer_page(eb, i); | ||
| 5091 | |||
| 5092 | cur = min(len, (PAGE_CACHE_SIZE - offset)); | ||
| 5093 | kaddr = page_address(page); | ||
| 5094 | if (copy_to_user(dst, kaddr + offset, cur)) { | ||
| 5095 | ret = -EFAULT; | ||
| 5096 | break; | ||
| 5097 | } | ||
| 5098 | |||
| 5099 | dst += cur; | ||
| 5100 | len -= cur; | ||
| 5101 | offset = 0; | ||
| 5102 | i++; | ||
| 5103 | } | ||
| 5104 | |||
| 5105 | return ret; | ||
| 5106 | } | ||
| 5107 | |||
| 5071 | int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start, | 5108 | int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start, |
| 5072 | unsigned long min_len, char **map, | 5109 | unsigned long min_len, char **map, |
| 5073 | unsigned long *map_start, | 5110 | unsigned long *map_start, |
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 8b63f2d46518..ccc264e7bde1 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h | |||
| @@ -158,7 +158,6 @@ struct extent_buffer { | |||
| 158 | * to unlock | 158 | * to unlock |
| 159 | */ | 159 | */ |
| 160 | wait_queue_head_t read_lock_wq; | 160 | wait_queue_head_t read_lock_wq; |
| 161 | wait_queue_head_t lock_wq; | ||
| 162 | struct page *pages[INLINE_EXTENT_BUFFER_PAGES]; | 161 | struct page *pages[INLINE_EXTENT_BUFFER_PAGES]; |
| 163 | #ifdef CONFIG_BTRFS_DEBUG | 162 | #ifdef CONFIG_BTRFS_DEBUG |
| 164 | struct list_head leak_list; | 163 | struct list_head leak_list; |
| @@ -304,6 +303,9 @@ int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv, | |||
| 304 | void read_extent_buffer(struct extent_buffer *eb, void *dst, | 303 | void read_extent_buffer(struct extent_buffer *eb, void *dst, |
| 305 | unsigned long start, | 304 | unsigned long start, |
| 306 | unsigned long len); | 305 | unsigned long len); |
| 306 | int read_extent_buffer_to_user(struct extent_buffer *eb, void __user *dst, | ||
| 307 | unsigned long start, | ||
| 308 | unsigned long len); | ||
| 307 | void write_extent_buffer(struct extent_buffer *eb, const void *src, | 309 | void write_extent_buffer(struct extent_buffer *eb, const void *src, |
| 308 | unsigned long start, unsigned long len); | 310 | unsigned long start, unsigned long len); |
| 309 | void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src, | 311 | void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src, |
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 1874aee69c86..225302b39afb 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c | |||
| @@ -75,6 +75,8 @@ void free_extent_map(struct extent_map *em) | |||
| 75 | if (atomic_dec_and_test(&em->refs)) { | 75 | if (atomic_dec_and_test(&em->refs)) { |
| 76 | WARN_ON(extent_map_in_tree(em)); | 76 | WARN_ON(extent_map_in_tree(em)); |
| 77 | WARN_ON(!list_empty(&em->list)); | 77 | WARN_ON(!list_empty(&em->list)); |
| 78 | if (test_bit(EXTENT_FLAG_FS_MAPPING, &em->flags)) | ||
| 79 | kfree(em->bdev); | ||
| 78 | kmem_cache_free(extent_map_cache, em); | 80 | kmem_cache_free(extent_map_cache, em); |
| 79 | } | 81 | } |
| 80 | } | 82 | } |
diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index e7fd8a56a140..b2991fd8583e 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h | |||
| @@ -15,6 +15,7 @@ | |||
| 15 | #define EXTENT_FLAG_PREALLOC 3 /* pre-allocated extent */ | 15 | #define EXTENT_FLAG_PREALLOC 3 /* pre-allocated extent */ |
| 16 | #define EXTENT_FLAG_LOGGING 4 /* Logging this extent */ | 16 | #define EXTENT_FLAG_LOGGING 4 /* Logging this extent */ |
| 17 | #define EXTENT_FLAG_FILLING 5 /* Filling in a preallocated extent */ | 17 | #define EXTENT_FLAG_FILLING 5 /* Filling in a preallocated extent */ |
| 18 | #define EXTENT_FLAG_FS_MAPPING 6 /* filesystem extent mapping type */ | ||
| 18 | 19 | ||
| 19 | struct extent_map { | 20 | struct extent_map { |
| 20 | struct rb_node rb_node; | 21 | struct rb_node rb_node; |
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index e472441feb5d..1f2b99cb55ea 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
| @@ -448,7 +448,7 @@ static noinline int btrfs_copy_from_user(loff_t pos, int num_pages, | |||
| 448 | write_bytes -= copied; | 448 | write_bytes -= copied; |
| 449 | total_copied += copied; | 449 | total_copied += copied; |
| 450 | 450 | ||
| 451 | /* Return to btrfs_file_aio_write to fault page */ | 451 | /* Return to btrfs_file_write_iter to fault page */ |
| 452 | if (unlikely(copied == 0)) | 452 | if (unlikely(copied == 0)) |
| 453 | break; | 453 | break; |
| 454 | 454 | ||
| @@ -1675,27 +1675,22 @@ again: | |||
| 1675 | } | 1675 | } |
| 1676 | 1676 | ||
| 1677 | static ssize_t __btrfs_direct_write(struct kiocb *iocb, | 1677 | static ssize_t __btrfs_direct_write(struct kiocb *iocb, |
| 1678 | const struct iovec *iov, | 1678 | struct iov_iter *from, |
| 1679 | unsigned long nr_segs, loff_t pos, | 1679 | loff_t pos) |
| 1680 | size_t count, size_t ocount) | ||
| 1681 | { | 1680 | { |
| 1682 | struct file *file = iocb->ki_filp; | 1681 | struct file *file = iocb->ki_filp; |
| 1683 | struct iov_iter i; | ||
| 1684 | ssize_t written; | 1682 | ssize_t written; |
| 1685 | ssize_t written_buffered; | 1683 | ssize_t written_buffered; |
| 1686 | loff_t endbyte; | 1684 | loff_t endbyte; |
| 1687 | int err; | 1685 | int err; |
| 1688 | 1686 | ||
| 1689 | written = generic_file_direct_write(iocb, iov, &nr_segs, pos, | 1687 | written = generic_file_direct_write(iocb, from, pos); |
| 1690 | count, ocount); | ||
| 1691 | 1688 | ||
| 1692 | if (written < 0 || written == count) | 1689 | if (written < 0 || !iov_iter_count(from)) |
| 1693 | return written; | 1690 | return written; |
| 1694 | 1691 | ||
| 1695 | pos += written; | 1692 | pos += written; |
| 1696 | count -= written; | 1693 | written_buffered = __btrfs_buffered_write(file, from, pos); |
| 1697 | iov_iter_init(&i, iov, nr_segs, count, written); | ||
| 1698 | written_buffered = __btrfs_buffered_write(file, &i, pos); | ||
| 1699 | if (written_buffered < 0) { | 1694 | if (written_buffered < 0) { |
| 1700 | err = written_buffered; | 1695 | err = written_buffered; |
| 1701 | goto out; | 1696 | goto out; |
| @@ -1730,9 +1725,8 @@ static void update_time_for_write(struct inode *inode) | |||
| 1730 | inode_inc_iversion(inode); | 1725 | inode_inc_iversion(inode); |
| 1731 | } | 1726 | } |
| 1732 | 1727 | ||
| 1733 | static ssize_t btrfs_file_aio_write(struct kiocb *iocb, | 1728 | static ssize_t btrfs_file_write_iter(struct kiocb *iocb, |
| 1734 | const struct iovec *iov, | 1729 | struct iov_iter *from) |
| 1735 | unsigned long nr_segs, loff_t pos) | ||
| 1736 | { | 1730 | { |
| 1737 | struct file *file = iocb->ki_filp; | 1731 | struct file *file = iocb->ki_filp; |
| 1738 | struct inode *inode = file_inode(file); | 1732 | struct inode *inode = file_inode(file); |
| @@ -1741,18 +1735,12 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, | |||
| 1741 | u64 end_pos; | 1735 | u64 end_pos; |
| 1742 | ssize_t num_written = 0; | 1736 | ssize_t num_written = 0; |
| 1743 | ssize_t err = 0; | 1737 | ssize_t err = 0; |
| 1744 | size_t count, ocount; | 1738 | size_t count = iov_iter_count(from); |
| 1745 | bool sync = (file->f_flags & O_DSYNC) || IS_SYNC(file->f_mapping->host); | 1739 | bool sync = (file->f_flags & O_DSYNC) || IS_SYNC(file->f_mapping->host); |
| 1740 | loff_t pos = iocb->ki_pos; | ||
| 1746 | 1741 | ||
| 1747 | mutex_lock(&inode->i_mutex); | 1742 | mutex_lock(&inode->i_mutex); |
| 1748 | 1743 | ||
| 1749 | err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ); | ||
| 1750 | if (err) { | ||
| 1751 | mutex_unlock(&inode->i_mutex); | ||
| 1752 | goto out; | ||
| 1753 | } | ||
| 1754 | count = ocount; | ||
| 1755 | |||
| 1756 | current->backing_dev_info = inode->i_mapping->backing_dev_info; | 1744 | current->backing_dev_info = inode->i_mapping->backing_dev_info; |
| 1757 | err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); | 1745 | err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); |
| 1758 | if (err) { | 1746 | if (err) { |
| @@ -1765,6 +1753,8 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, | |||
| 1765 | goto out; | 1753 | goto out; |
| 1766 | } | 1754 | } |
| 1767 | 1755 | ||
| 1756 | iov_iter_truncate(from, count); | ||
| 1757 | |||
| 1768 | err = file_remove_suid(file); | 1758 | err = file_remove_suid(file); |
| 1769 | if (err) { | 1759 | if (err) { |
| 1770 | mutex_unlock(&inode->i_mutex); | 1760 | mutex_unlock(&inode->i_mutex); |
| @@ -1806,14 +1796,9 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, | |||
| 1806 | atomic_inc(&BTRFS_I(inode)->sync_writers); | 1796 | atomic_inc(&BTRFS_I(inode)->sync_writers); |
| 1807 | 1797 | ||
| 1808 | if (unlikely(file->f_flags & O_DIRECT)) { | 1798 | if (unlikely(file->f_flags & O_DIRECT)) { |
| 1809 | num_written = __btrfs_direct_write(iocb, iov, nr_segs, | 1799 | num_written = __btrfs_direct_write(iocb, from, pos); |
| 1810 | pos, count, ocount); | ||
| 1811 | } else { | 1800 | } else { |
| 1812 | struct iov_iter i; | 1801 | num_written = __btrfs_buffered_write(file, from, pos); |
| 1813 | |||
| 1814 | iov_iter_init(&i, iov, nr_segs, count, num_written); | ||
| 1815 | |||
| 1816 | num_written = __btrfs_buffered_write(file, &i, pos); | ||
| 1817 | if (num_written > 0) | 1802 | if (num_written > 0) |
| 1818 | iocb->ki_pos = pos + num_written; | 1803 | iocb->ki_pos = pos + num_written; |
| 1819 | } | 1804 | } |
| @@ -2740,11 +2725,11 @@ out: | |||
| 2740 | 2725 | ||
| 2741 | const struct file_operations btrfs_file_operations = { | 2726 | const struct file_operations btrfs_file_operations = { |
| 2742 | .llseek = btrfs_file_llseek, | 2727 | .llseek = btrfs_file_llseek, |
| 2743 | .read = do_sync_read, | 2728 | .read = new_sync_read, |
| 2744 | .write = do_sync_write, | 2729 | .write = new_sync_write, |
| 2745 | .aio_read = generic_file_aio_read, | 2730 | .read_iter = generic_file_read_iter, |
| 2746 | .splice_read = generic_file_splice_read, | 2731 | .splice_read = generic_file_splice_read, |
| 2747 | .aio_write = btrfs_file_aio_write, | 2732 | .write_iter = btrfs_file_write_iter, |
| 2748 | .mmap = btrfs_file_mmap, | 2733 | .mmap = btrfs_file_mmap, |
| 2749 | .open = generic_file_open, | 2734 | .open = generic_file_open, |
| 2750 | .release = btrfs_release_file, | 2735 | .release = btrfs_release_file, |
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 372b05ff1943..2b0a627cb5f9 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c | |||
| @@ -274,18 +274,32 @@ struct io_ctl { | |||
| 274 | }; | 274 | }; |
| 275 | 275 | ||
| 276 | static int io_ctl_init(struct io_ctl *io_ctl, struct inode *inode, | 276 | static int io_ctl_init(struct io_ctl *io_ctl, struct inode *inode, |
| 277 | struct btrfs_root *root) | 277 | struct btrfs_root *root, int write) |
| 278 | { | 278 | { |
| 279 | int num_pages; | ||
| 280 | int check_crcs = 0; | ||
| 281 | |||
| 282 | num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> | ||
| 283 | PAGE_CACHE_SHIFT; | ||
| 284 | |||
| 285 | if (btrfs_ino(inode) != BTRFS_FREE_INO_OBJECTID) | ||
| 286 | check_crcs = 1; | ||
| 287 | |||
| 288 | /* Make sure we can fit our crcs into the first page */ | ||
| 289 | if (write && check_crcs && | ||
| 290 | (num_pages * sizeof(u32)) >= PAGE_CACHE_SIZE) | ||
| 291 | return -ENOSPC; | ||
| 292 | |||
| 279 | memset(io_ctl, 0, sizeof(struct io_ctl)); | 293 | memset(io_ctl, 0, sizeof(struct io_ctl)); |
| 280 | io_ctl->num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> | 294 | |
| 281 | PAGE_CACHE_SHIFT; | 295 | io_ctl->pages = kzalloc(sizeof(struct page *) * num_pages, GFP_NOFS); |
| 282 | io_ctl->pages = kzalloc(sizeof(struct page *) * io_ctl->num_pages, | ||
| 283 | GFP_NOFS); | ||
| 284 | if (!io_ctl->pages) | 296 | if (!io_ctl->pages) |
| 285 | return -ENOMEM; | 297 | return -ENOMEM; |
| 298 | |||
| 299 | io_ctl->num_pages = num_pages; | ||
| 286 | io_ctl->root = root; | 300 | io_ctl->root = root; |
| 287 | if (btrfs_ino(inode) != BTRFS_FREE_INO_OBJECTID) | 301 | io_ctl->check_crcs = check_crcs; |
| 288 | io_ctl->check_crcs = 1; | 302 | |
| 289 | return 0; | 303 | return 0; |
| 290 | } | 304 | } |
| 291 | 305 | ||
| @@ -666,6 +680,13 @@ static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, | |||
| 666 | generation = btrfs_free_space_generation(leaf, header); | 680 | generation = btrfs_free_space_generation(leaf, header); |
| 667 | btrfs_release_path(path); | 681 | btrfs_release_path(path); |
| 668 | 682 | ||
| 683 | if (!BTRFS_I(inode)->generation) { | ||
| 684 | btrfs_info(root->fs_info, | ||
| 685 | "The free space cache file (%llu) is invalid. skip it\n", | ||
| 686 | offset); | ||
| 687 | return 0; | ||
| 688 | } | ||
| 689 | |||
| 669 | if (BTRFS_I(inode)->generation != generation) { | 690 | if (BTRFS_I(inode)->generation != generation) { |
| 670 | btrfs_err(root->fs_info, | 691 | btrfs_err(root->fs_info, |
| 671 | "free space inode generation (%llu) " | 692 | "free space inode generation (%llu) " |
| @@ -677,7 +698,7 @@ static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, | |||
| 677 | if (!num_entries) | 698 | if (!num_entries) |
| 678 | return 0; | 699 | return 0; |
| 679 | 700 | ||
| 680 | ret = io_ctl_init(&io_ctl, inode, root); | 701 | ret = io_ctl_init(&io_ctl, inode, root, 0); |
| 681 | if (ret) | 702 | if (ret) |
| 682 | return ret; | 703 | return ret; |
| 683 | 704 | ||
| @@ -957,19 +978,18 @@ fail: | |||
| 957 | } | 978 | } |
| 958 | 979 | ||
| 959 | static noinline_for_stack int | 980 | static noinline_for_stack int |
| 960 | add_ioctl_entries(struct btrfs_root *root, | 981 | write_pinned_extent_entries(struct btrfs_root *root, |
| 961 | struct inode *inode, | 982 | struct btrfs_block_group_cache *block_group, |
| 962 | struct btrfs_block_group_cache *block_group, | 983 | struct io_ctl *io_ctl, |
| 963 | struct io_ctl *io_ctl, | 984 | int *entries) |
| 964 | struct extent_state **cached_state, | ||
| 965 | struct list_head *bitmap_list, | ||
| 966 | int *entries) | ||
| 967 | { | 985 | { |
| 968 | u64 start, extent_start, extent_end, len; | 986 | u64 start, extent_start, extent_end, len; |
| 969 | struct list_head *pos, *n; | ||
| 970 | struct extent_io_tree *unpin = NULL; | 987 | struct extent_io_tree *unpin = NULL; |
| 971 | int ret; | 988 | int ret; |
| 972 | 989 | ||
| 990 | if (!block_group) | ||
| 991 | return 0; | ||
| 992 | |||
| 973 | /* | 993 | /* |
| 974 | * We want to add any pinned extents to our free space cache | 994 | * We want to add any pinned extents to our free space cache |
| 975 | * so we don't leak the space | 995 | * so we don't leak the space |
| @@ -979,23 +999,19 @@ add_ioctl_entries(struct btrfs_root *root, | |||
| 979 | */ | 999 | */ |
| 980 | unpin = root->fs_info->pinned_extents; | 1000 | unpin = root->fs_info->pinned_extents; |
| 981 | 1001 | ||
| 982 | if (block_group) | 1002 | start = block_group->key.objectid; |
| 983 | start = block_group->key.objectid; | ||
| 984 | 1003 | ||
| 985 | while (block_group && (start < block_group->key.objectid + | 1004 | while (start < block_group->key.objectid + block_group->key.offset) { |
| 986 | block_group->key.offset)) { | ||
| 987 | ret = find_first_extent_bit(unpin, start, | 1005 | ret = find_first_extent_bit(unpin, start, |
| 988 | &extent_start, &extent_end, | 1006 | &extent_start, &extent_end, |
| 989 | EXTENT_DIRTY, NULL); | 1007 | EXTENT_DIRTY, NULL); |
| 990 | if (ret) { | 1008 | if (ret) |
| 991 | ret = 0; | 1009 | return 0; |
| 992 | break; | ||
| 993 | } | ||
| 994 | 1010 | ||
| 995 | /* This pinned extent is out of our range */ | 1011 | /* This pinned extent is out of our range */ |
| 996 | if (extent_start >= block_group->key.objectid + | 1012 | if (extent_start >= block_group->key.objectid + |
| 997 | block_group->key.offset) | 1013 | block_group->key.offset) |
| 998 | break; | 1014 | return 0; |
| 999 | 1015 | ||
| 1000 | extent_start = max(extent_start, start); | 1016 | extent_start = max(extent_start, start); |
| 1001 | extent_end = min(block_group->key.objectid + | 1017 | extent_end = min(block_group->key.objectid + |
| @@ -1005,11 +1021,20 @@ add_ioctl_entries(struct btrfs_root *root, | |||
| 1005 | *entries += 1; | 1021 | *entries += 1; |
| 1006 | ret = io_ctl_add_entry(io_ctl, extent_start, len, NULL); | 1022 | ret = io_ctl_add_entry(io_ctl, extent_start, len, NULL); |
| 1007 | if (ret) | 1023 | if (ret) |
| 1008 | goto out_nospc; | 1024 | return -ENOSPC; |
| 1009 | 1025 | ||
| 1010 | start = extent_end; | 1026 | start = extent_end; |
| 1011 | } | 1027 | } |
| 1012 | 1028 | ||
| 1029 | return 0; | ||
| 1030 | } | ||
| 1031 | |||
| 1032 | static noinline_for_stack int | ||
| 1033 | write_bitmap_entries(struct io_ctl *io_ctl, struct list_head *bitmap_list) | ||
| 1034 | { | ||
| 1035 | struct list_head *pos, *n; | ||
| 1036 | int ret; | ||
| 1037 | |||
| 1013 | /* Write out the bitmaps */ | 1038 | /* Write out the bitmaps */ |
| 1014 | list_for_each_safe(pos, n, bitmap_list) { | 1039 | list_for_each_safe(pos, n, bitmap_list) { |
| 1015 | struct btrfs_free_space *entry = | 1040 | struct btrfs_free_space *entry = |
| @@ -1017,36 +1042,24 @@ add_ioctl_entries(struct btrfs_root *root, | |||
| 1017 | 1042 | ||
| 1018 | ret = io_ctl_add_bitmap(io_ctl, entry->bitmap); | 1043 | ret = io_ctl_add_bitmap(io_ctl, entry->bitmap); |
| 1019 | if (ret) | 1044 | if (ret) |
| 1020 | goto out_nospc; | 1045 | return -ENOSPC; |
| 1021 | list_del_init(&entry->list); | 1046 | list_del_init(&entry->list); |
| 1022 | } | 1047 | } |
| 1023 | 1048 | ||
| 1024 | /* Zero out the rest of the pages just to make sure */ | 1049 | return 0; |
| 1025 | io_ctl_zero_remaining_pages(io_ctl); | 1050 | } |
| 1026 | |||
| 1027 | ret = btrfs_dirty_pages(root, inode, io_ctl->pages, io_ctl->num_pages, | ||
| 1028 | 0, i_size_read(inode), cached_state); | ||
| 1029 | io_ctl_drop_pages(io_ctl); | ||
| 1030 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, | ||
| 1031 | i_size_read(inode) - 1, cached_state, GFP_NOFS); | ||
| 1032 | 1051 | ||
| 1033 | if (ret) | 1052 | static int flush_dirty_cache(struct inode *inode) |
| 1034 | goto fail; | 1053 | { |
| 1054 | int ret; | ||
| 1035 | 1055 | ||
| 1036 | ret = btrfs_wait_ordered_range(inode, 0, (u64)-1); | 1056 | ret = btrfs_wait_ordered_range(inode, 0, (u64)-1); |
| 1037 | if (ret) { | 1057 | if (ret) |
| 1038 | clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1, | 1058 | clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1, |
| 1039 | EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL, | 1059 | EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL, |
| 1040 | GFP_NOFS); | 1060 | GFP_NOFS); |
| 1041 | goto fail; | ||
| 1042 | } | ||
| 1043 | return 0; | ||
| 1044 | 1061 | ||
| 1045 | fail: | 1062 | return ret; |
| 1046 | return -1; | ||
| 1047 | |||
| 1048 | out_nospc: | ||
| 1049 | return -ENOSPC; | ||
| 1050 | } | 1063 | } |
| 1051 | 1064 | ||
| 1052 | static void noinline_for_stack | 1065 | static void noinline_for_stack |
| @@ -1056,6 +1069,7 @@ cleanup_write_cache_enospc(struct inode *inode, | |||
| 1056 | struct list_head *bitmap_list) | 1069 | struct list_head *bitmap_list) |
| 1057 | { | 1070 | { |
| 1058 | struct list_head *pos, *n; | 1071 | struct list_head *pos, *n; |
| 1072 | |||
| 1059 | list_for_each_safe(pos, n, bitmap_list) { | 1073 | list_for_each_safe(pos, n, bitmap_list) { |
| 1060 | struct btrfs_free_space *entry = | 1074 | struct btrfs_free_space *entry = |
| 1061 | list_entry(pos, struct btrfs_free_space, list); | 1075 | list_entry(pos, struct btrfs_free_space, list); |
| @@ -1088,64 +1102,104 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
| 1088 | { | 1102 | { |
| 1089 | struct extent_state *cached_state = NULL; | 1103 | struct extent_state *cached_state = NULL; |
| 1090 | struct io_ctl io_ctl; | 1104 | struct io_ctl io_ctl; |
| 1091 | struct list_head bitmap_list; | 1105 | LIST_HEAD(bitmap_list); |
| 1092 | int entries = 0; | 1106 | int entries = 0; |
| 1093 | int bitmaps = 0; | 1107 | int bitmaps = 0; |
| 1094 | int ret; | 1108 | int ret; |
| 1095 | int err = -1; | ||
| 1096 | |||
| 1097 | INIT_LIST_HEAD(&bitmap_list); | ||
| 1098 | 1109 | ||
| 1099 | if (!i_size_read(inode)) | 1110 | if (!i_size_read(inode)) |
| 1100 | return -1; | 1111 | return -1; |
| 1101 | 1112 | ||
| 1102 | ret = io_ctl_init(&io_ctl, inode, root); | 1113 | ret = io_ctl_init(&io_ctl, inode, root, 1); |
| 1103 | if (ret) | 1114 | if (ret) |
| 1104 | return -1; | 1115 | return -1; |
| 1105 | 1116 | ||
| 1117 | if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)) { | ||
| 1118 | down_write(&block_group->data_rwsem); | ||
| 1119 | spin_lock(&block_group->lock); | ||
| 1120 | if (block_group->delalloc_bytes) { | ||
| 1121 | block_group->disk_cache_state = BTRFS_DC_WRITTEN; | ||
| 1122 | spin_unlock(&block_group->lock); | ||
| 1123 | up_write(&block_group->data_rwsem); | ||
| 1124 | BTRFS_I(inode)->generation = 0; | ||
| 1125 | ret = 0; | ||
| 1126 | goto out; | ||
| 1127 | } | ||
| 1128 | spin_unlock(&block_group->lock); | ||
| 1129 | } | ||
| 1130 | |||
| 1106 | /* Lock all pages first so we can lock the extent safely. */ | 1131 | /* Lock all pages first so we can lock the extent safely. */ |
| 1107 | io_ctl_prepare_pages(&io_ctl, inode, 0); | 1132 | io_ctl_prepare_pages(&io_ctl, inode, 0); |
| 1108 | 1133 | ||
| 1109 | lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1, | 1134 | lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1, |
| 1110 | 0, &cached_state); | 1135 | 0, &cached_state); |
| 1111 | 1136 | ||
| 1112 | |||
| 1113 | /* Make sure we can fit our crcs into the first page */ | ||
| 1114 | if (io_ctl.check_crcs && | ||
| 1115 | (io_ctl.num_pages * sizeof(u32)) >= PAGE_CACHE_SIZE) | ||
| 1116 | goto out_nospc; | ||
| 1117 | |||
| 1118 | io_ctl_set_generation(&io_ctl, trans->transid); | 1137 | io_ctl_set_generation(&io_ctl, trans->transid); |
| 1119 | 1138 | ||
| 1139 | /* Write out the extent entries in the free space cache */ | ||
| 1120 | ret = write_cache_extent_entries(&io_ctl, ctl, | 1140 | ret = write_cache_extent_entries(&io_ctl, ctl, |
| 1121 | block_group, &entries, &bitmaps, | 1141 | block_group, &entries, &bitmaps, |
| 1122 | &bitmap_list); | 1142 | &bitmap_list); |
| 1123 | if (ret) | 1143 | if (ret) |
| 1124 | goto out_nospc; | 1144 | goto out_nospc; |
| 1125 | 1145 | ||
| 1126 | ret = add_ioctl_entries(root, inode, block_group, &io_ctl, | 1146 | /* |
| 1127 | &cached_state, &bitmap_list, &entries); | 1147 | * Some spaces that are freed in the current transaction are pinned, |
| 1148 | * they will be added into free space cache after the transaction is | ||
| 1149 | * committed, we shouldn't lose them. | ||
| 1150 | */ | ||
| 1151 | ret = write_pinned_extent_entries(root, block_group, &io_ctl, &entries); | ||
| 1152 | if (ret) | ||
| 1153 | goto out_nospc; | ||
| 1128 | 1154 | ||
| 1129 | if (ret == -ENOSPC) | 1155 | /* At last, we write out all the bitmaps. */ |
| 1156 | ret = write_bitmap_entries(&io_ctl, &bitmap_list); | ||
| 1157 | if (ret) | ||
| 1130 | goto out_nospc; | 1158 | goto out_nospc; |
| 1131 | else if (ret) | 1159 | |
| 1160 | /* Zero out the rest of the pages just to make sure */ | ||
| 1161 | io_ctl_zero_remaining_pages(&io_ctl); | ||
| 1162 | |||
| 1163 | /* Everything is written out, now we dirty the pages in the file. */ | ||
| 1164 | ret = btrfs_dirty_pages(root, inode, io_ctl.pages, io_ctl.num_pages, | ||
| 1165 | 0, i_size_read(inode), &cached_state); | ||
| 1166 | if (ret) | ||
| 1167 | goto out_nospc; | ||
| 1168 | |||
| 1169 | if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)) | ||
| 1170 | up_write(&block_group->data_rwsem); | ||
| 1171 | /* | ||
| 1172 | * Release the pages and unlock the extent, we will flush | ||
| 1173 | * them out later | ||
| 1174 | */ | ||
| 1175 | io_ctl_drop_pages(&io_ctl); | ||
| 1176 | |||
| 1177 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, | ||
| 1178 | i_size_read(inode) - 1, &cached_state, GFP_NOFS); | ||
| 1179 | |||
| 1180 | /* Flush the dirty pages in the cache file. */ | ||
| 1181 | ret = flush_dirty_cache(inode); | ||
| 1182 | if (ret) | ||
| 1132 | goto out; | 1183 | goto out; |
| 1133 | 1184 | ||
| 1134 | err = update_cache_item(trans, root, inode, path, offset, | 1185 | /* Update the cache item to tell everyone this cache file is valid. */ |
| 1186 | ret = update_cache_item(trans, root, inode, path, offset, | ||
| 1135 | entries, bitmaps); | 1187 | entries, bitmaps); |
| 1136 | |||
| 1137 | out: | 1188 | out: |
| 1138 | io_ctl_free(&io_ctl); | 1189 | io_ctl_free(&io_ctl); |
| 1139 | if (err) { | 1190 | if (ret) { |
| 1140 | invalidate_inode_pages2(inode->i_mapping); | 1191 | invalidate_inode_pages2(inode->i_mapping); |
| 1141 | BTRFS_I(inode)->generation = 0; | 1192 | BTRFS_I(inode)->generation = 0; |
| 1142 | } | 1193 | } |
| 1143 | btrfs_update_inode(trans, root, inode); | 1194 | btrfs_update_inode(trans, root, inode); |
| 1144 | return err; | 1195 | return ret; |
| 1145 | 1196 | ||
| 1146 | out_nospc: | 1197 | out_nospc: |
| 1147 | |||
| 1148 | cleanup_write_cache_enospc(inode, &io_ctl, &cached_state, &bitmap_list); | 1198 | cleanup_write_cache_enospc(inode, &io_ctl, &cached_state, &bitmap_list); |
| 1199 | |||
| 1200 | if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)) | ||
| 1201 | up_write(&block_group->data_rwsem); | ||
| 1202 | |||
| 1149 | goto out; | 1203 | goto out; |
| 1150 | } | 1204 | } |
| 1151 | 1205 | ||
| @@ -1165,6 +1219,12 @@ int btrfs_write_out_cache(struct btrfs_root *root, | |||
| 1165 | spin_unlock(&block_group->lock); | 1219 | spin_unlock(&block_group->lock); |
| 1166 | return 0; | 1220 | return 0; |
| 1167 | } | 1221 | } |
| 1222 | |||
| 1223 | if (block_group->delalloc_bytes) { | ||
| 1224 | block_group->disk_cache_state = BTRFS_DC_WRITTEN; | ||
| 1225 | spin_unlock(&block_group->lock); | ||
| 1226 | return 0; | ||
| 1227 | } | ||
| 1168 | spin_unlock(&block_group->lock); | 1228 | spin_unlock(&block_group->lock); |
| 1169 | 1229 | ||
| 1170 | inode = lookup_free_space_inode(root, block_group, path); | 1230 | inode = lookup_free_space_inode(root, block_group, path); |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 7fa5f7fd7bc7..3668048e16f8 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
| @@ -693,7 +693,7 @@ retry: | |||
| 693 | ret = btrfs_reserve_extent(root, | 693 | ret = btrfs_reserve_extent(root, |
| 694 | async_extent->compressed_size, | 694 | async_extent->compressed_size, |
| 695 | async_extent->compressed_size, | 695 | async_extent->compressed_size, |
| 696 | 0, alloc_hint, &ins, 1); | 696 | 0, alloc_hint, &ins, 1, 1); |
| 697 | if (ret) { | 697 | if (ret) { |
| 698 | int i; | 698 | int i; |
| 699 | 699 | ||
| @@ -794,7 +794,7 @@ retry: | |||
| 794 | out: | 794 | out: |
| 795 | return ret; | 795 | return ret; |
| 796 | out_free_reserve: | 796 | out_free_reserve: |
| 797 | btrfs_free_reserved_extent(root, ins.objectid, ins.offset); | 797 | btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1); |
| 798 | out_free: | 798 | out_free: |
| 799 | extent_clear_unlock_delalloc(inode, async_extent->start, | 799 | extent_clear_unlock_delalloc(inode, async_extent->start, |
| 800 | async_extent->start + | 800 | async_extent->start + |
| @@ -917,7 +917,7 @@ static noinline int cow_file_range(struct inode *inode, | |||
| 917 | cur_alloc_size = disk_num_bytes; | 917 | cur_alloc_size = disk_num_bytes; |
| 918 | ret = btrfs_reserve_extent(root, cur_alloc_size, | 918 | ret = btrfs_reserve_extent(root, cur_alloc_size, |
| 919 | root->sectorsize, 0, alloc_hint, | 919 | root->sectorsize, 0, alloc_hint, |
| 920 | &ins, 1); | 920 | &ins, 1, 1); |
| 921 | if (ret < 0) | 921 | if (ret < 0) |
| 922 | goto out_unlock; | 922 | goto out_unlock; |
| 923 | 923 | ||
| @@ -995,7 +995,7 @@ out: | |||
| 995 | return ret; | 995 | return ret; |
| 996 | 996 | ||
| 997 | out_reserve: | 997 | out_reserve: |
| 998 | btrfs_free_reserved_extent(root, ins.objectid, ins.offset); | 998 | btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1); |
| 999 | out_unlock: | 999 | out_unlock: |
| 1000 | extent_clear_unlock_delalloc(inode, start, end, locked_page, | 1000 | extent_clear_unlock_delalloc(inode, start, end, locked_page, |
| 1001 | EXTENT_LOCKED | EXTENT_DO_ACCOUNTING | | 1001 | EXTENT_LOCKED | EXTENT_DO_ACCOUNTING | |
| @@ -2599,6 +2599,21 @@ out_kfree: | |||
| 2599 | return NULL; | 2599 | return NULL; |
| 2600 | } | 2600 | } |
| 2601 | 2601 | ||
| 2602 | static void btrfs_release_delalloc_bytes(struct btrfs_root *root, | ||
| 2603 | u64 start, u64 len) | ||
| 2604 | { | ||
| 2605 | struct btrfs_block_group_cache *cache; | ||
| 2606 | |||
| 2607 | cache = btrfs_lookup_block_group(root->fs_info, start); | ||
| 2608 | ASSERT(cache); | ||
| 2609 | |||
| 2610 | spin_lock(&cache->lock); | ||
| 2611 | cache->delalloc_bytes -= len; | ||
| 2612 | spin_unlock(&cache->lock); | ||
| 2613 | |||
| 2614 | btrfs_put_block_group(cache); | ||
| 2615 | } | ||
| 2616 | |||
| 2602 | /* as ordered data IO finishes, this gets called so we can finish | 2617 | /* as ordered data IO finishes, this gets called so we can finish |
| 2603 | * an ordered extent if the range of bytes in the file it covers are | 2618 | * an ordered extent if the range of bytes in the file it covers are |
| 2604 | * fully written. | 2619 | * fully written. |
| @@ -2698,6 +2713,10 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent) | |||
| 2698 | logical_len, logical_len, | 2713 | logical_len, logical_len, |
| 2699 | compress_type, 0, 0, | 2714 | compress_type, 0, 0, |
| 2700 | BTRFS_FILE_EXTENT_REG); | 2715 | BTRFS_FILE_EXTENT_REG); |
| 2716 | if (!ret) | ||
| 2717 | btrfs_release_delalloc_bytes(root, | ||
| 2718 | ordered_extent->start, | ||
| 2719 | ordered_extent->disk_len); | ||
| 2701 | } | 2720 | } |
| 2702 | unpin_extent_cache(&BTRFS_I(inode)->extent_tree, | 2721 | unpin_extent_cache(&BTRFS_I(inode)->extent_tree, |
| 2703 | ordered_extent->file_offset, ordered_extent->len, | 2722 | ordered_extent->file_offset, ordered_extent->len, |
| @@ -2750,7 +2769,7 @@ out: | |||
| 2750 | !test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags) && | 2769 | !test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags) && |
| 2751 | !test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) | 2770 | !test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) |
| 2752 | btrfs_free_reserved_extent(root, ordered_extent->start, | 2771 | btrfs_free_reserved_extent(root, ordered_extent->start, |
| 2753 | ordered_extent->disk_len); | 2772 | ordered_extent->disk_len, 1); |
| 2754 | } | 2773 | } |
| 2755 | 2774 | ||
| 2756 | 2775 | ||
| @@ -6535,21 +6554,21 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode, | |||
| 6535 | 6554 | ||
| 6536 | alloc_hint = get_extent_allocation_hint(inode, start, len); | 6555 | alloc_hint = get_extent_allocation_hint(inode, start, len); |
| 6537 | ret = btrfs_reserve_extent(root, len, root->sectorsize, 0, | 6556 | ret = btrfs_reserve_extent(root, len, root->sectorsize, 0, |
| 6538 | alloc_hint, &ins, 1); | 6557 | alloc_hint, &ins, 1, 1); |
| 6539 | if (ret) | 6558 | if (ret) |
| 6540 | return ERR_PTR(ret); | 6559 | return ERR_PTR(ret); |
| 6541 | 6560 | ||
| 6542 | em = create_pinned_em(inode, start, ins.offset, start, ins.objectid, | 6561 | em = create_pinned_em(inode, start, ins.offset, start, ins.objectid, |
| 6543 | ins.offset, ins.offset, ins.offset, 0); | 6562 | ins.offset, ins.offset, ins.offset, 0); |
| 6544 | if (IS_ERR(em)) { | 6563 | if (IS_ERR(em)) { |
| 6545 | btrfs_free_reserved_extent(root, ins.objectid, ins.offset); | 6564 | btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1); |
| 6546 | return em; | 6565 | return em; |
| 6547 | } | 6566 | } |
| 6548 | 6567 | ||
| 6549 | ret = btrfs_add_ordered_extent_dio(inode, start, ins.objectid, | 6568 | ret = btrfs_add_ordered_extent_dio(inode, start, ins.objectid, |
| 6550 | ins.offset, ins.offset, 0); | 6569 | ins.offset, ins.offset, 0); |
| 6551 | if (ret) { | 6570 | if (ret) { |
| 6552 | btrfs_free_reserved_extent(root, ins.objectid, ins.offset); | 6571 | btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1); |
| 6553 | free_extent_map(em); | 6572 | free_extent_map(em); |
| 6554 | return ERR_PTR(ret); | 6573 | return ERR_PTR(ret); |
| 6555 | } | 6574 | } |
| @@ -7437,7 +7456,7 @@ free_ordered: | |||
| 7437 | if (!test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags) && | 7456 | if (!test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags) && |
| 7438 | !test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags)) | 7457 | !test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags)) |
| 7439 | btrfs_free_reserved_extent(root, ordered->start, | 7458 | btrfs_free_reserved_extent(root, ordered->start, |
| 7440 | ordered->disk_len); | 7459 | ordered->disk_len, 1); |
| 7441 | btrfs_put_ordered_extent(ordered); | 7460 | btrfs_put_ordered_extent(ordered); |
| 7442 | btrfs_put_ordered_extent(ordered); | 7461 | btrfs_put_ordered_extent(ordered); |
| 7443 | } | 7462 | } |
| @@ -7445,39 +7464,30 @@ free_ordered: | |||
| 7445 | } | 7464 | } |
| 7446 | 7465 | ||
| 7447 | static ssize_t check_direct_IO(struct btrfs_root *root, int rw, struct kiocb *iocb, | 7466 | static ssize_t check_direct_IO(struct btrfs_root *root, int rw, struct kiocb *iocb, |
| 7448 | const struct iovec *iov, loff_t offset, | 7467 | const struct iov_iter *iter, loff_t offset) |
| 7449 | unsigned long nr_segs) | ||
| 7450 | { | 7468 | { |
| 7451 | int seg; | 7469 | int seg; |
| 7452 | int i; | 7470 | int i; |
| 7453 | size_t size; | ||
| 7454 | unsigned long addr; | ||
| 7455 | unsigned blocksize_mask = root->sectorsize - 1; | 7471 | unsigned blocksize_mask = root->sectorsize - 1; |
| 7456 | ssize_t retval = -EINVAL; | 7472 | ssize_t retval = -EINVAL; |
| 7457 | loff_t end = offset; | ||
| 7458 | 7473 | ||
| 7459 | if (offset & blocksize_mask) | 7474 | if (offset & blocksize_mask) |
| 7460 | goto out; | 7475 | goto out; |
| 7461 | 7476 | ||
| 7462 | /* Check the memory alignment. Blocks cannot straddle pages */ | 7477 | if (iov_iter_alignment(iter) & blocksize_mask) |
| 7463 | for (seg = 0; seg < nr_segs; seg++) { | 7478 | goto out; |
| 7464 | addr = (unsigned long)iov[seg].iov_base; | ||
| 7465 | size = iov[seg].iov_len; | ||
| 7466 | end += size; | ||
| 7467 | if ((addr & blocksize_mask) || (size & blocksize_mask)) | ||
| 7468 | goto out; | ||
| 7469 | |||
| 7470 | /* If this is a write we don't need to check anymore */ | ||
| 7471 | if (rw & WRITE) | ||
| 7472 | continue; | ||
| 7473 | 7479 | ||
| 7474 | /* | 7480 | /* If this is a write we don't need to check anymore */ |
| 7475 | * Check to make sure we don't have duplicate iov_base's in this | 7481 | if (rw & WRITE) |
| 7476 | * iovec, if so return EINVAL, otherwise we'll get csum errors | 7482 | return 0; |
| 7477 | * when reading back. | 7483 | /* |
| 7478 | */ | 7484 | * Check to make sure we don't have duplicate iov_base's in this |
| 7479 | for (i = seg + 1; i < nr_segs; i++) { | 7485 | * iovec, if so return EINVAL, otherwise we'll get csum errors |
| 7480 | if (iov[seg].iov_base == iov[i].iov_base) | 7486 | * when reading back. |
| 7487 | */ | ||
| 7488 | for (seg = 0; seg < iter->nr_segs; seg++) { | ||
| 7489 | for (i = seg + 1; i < iter->nr_segs; i++) { | ||
| 7490 | if (iter->iov[seg].iov_base == iter->iov[i].iov_base) | ||
| 7481 | goto out; | 7491 | goto out; |
| 7482 | } | 7492 | } |
| 7483 | } | 7493 | } |
| @@ -7487,8 +7497,7 @@ out: | |||
| 7487 | } | 7497 | } |
| 7488 | 7498 | ||
| 7489 | static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, | 7499 | static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, |
| 7490 | const struct iovec *iov, loff_t offset, | 7500 | struct iov_iter *iter, loff_t offset) |
| 7491 | unsigned long nr_segs) | ||
| 7492 | { | 7501 | { |
| 7493 | struct file *file = iocb->ki_filp; | 7502 | struct file *file = iocb->ki_filp; |
| 7494 | struct inode *inode = file->f_mapping->host; | 7503 | struct inode *inode = file->f_mapping->host; |
| @@ -7498,8 +7507,7 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, | |||
| 7498 | bool relock = false; | 7507 | bool relock = false; |
| 7499 | ssize_t ret; | 7508 | ssize_t ret; |
| 7500 | 7509 | ||
| 7501 | if (check_direct_IO(BTRFS_I(inode)->root, rw, iocb, iov, | 7510 | if (check_direct_IO(BTRFS_I(inode)->root, rw, iocb, iter, offset)) |
| 7502 | offset, nr_segs)) | ||
| 7503 | return 0; | 7511 | return 0; |
| 7504 | 7512 | ||
| 7505 | atomic_inc(&inode->i_dio_count); | 7513 | atomic_inc(&inode->i_dio_count); |
| @@ -7511,7 +7519,7 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, | |||
| 7511 | * we need to flush the dirty pages again to make absolutely sure | 7519 | * we need to flush the dirty pages again to make absolutely sure |
| 7512 | * that any outstanding dirty pages are on disk. | 7520 | * that any outstanding dirty pages are on disk. |
| 7513 | */ | 7521 | */ |
| 7514 | count = iov_length(iov, nr_segs); | 7522 | count = iov_iter_count(iter); |
| 7515 | if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, | 7523 | if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, |
| 7516 | &BTRFS_I(inode)->runtime_flags)) | 7524 | &BTRFS_I(inode)->runtime_flags)) |
| 7517 | filemap_fdatawrite_range(inode->i_mapping, offset, count); | 7525 | filemap_fdatawrite_range(inode->i_mapping, offset, count); |
| @@ -7538,7 +7546,7 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, | |||
| 7538 | 7546 | ||
| 7539 | ret = __blockdev_direct_IO(rw, iocb, inode, | 7547 | ret = __blockdev_direct_IO(rw, iocb, inode, |
| 7540 | BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev, | 7548 | BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev, |
| 7541 | iov, offset, nr_segs, btrfs_get_blocks_direct, NULL, | 7549 | iter, offset, btrfs_get_blocks_direct, NULL, |
| 7542 | btrfs_submit_direct, flags); | 7550 | btrfs_submit_direct, flags); |
| 7543 | if (rw & WRITE) { | 7551 | if (rw & WRITE) { |
| 7544 | if (ret < 0 && ret != -EIOCBQUEUED) | 7552 | if (ret < 0 && ret != -EIOCBQUEUED) |
| @@ -8819,7 +8827,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode, | |||
| 8819 | cur_bytes = min(num_bytes, 256ULL * 1024 * 1024); | 8827 | cur_bytes = min(num_bytes, 256ULL * 1024 * 1024); |
| 8820 | cur_bytes = max(cur_bytes, min_size); | 8828 | cur_bytes = max(cur_bytes, min_size); |
| 8821 | ret = btrfs_reserve_extent(root, cur_bytes, min_size, 0, | 8829 | ret = btrfs_reserve_extent(root, cur_bytes, min_size, 0, |
| 8822 | *alloc_hint, &ins, 1); | 8830 | *alloc_hint, &ins, 1, 0); |
| 8823 | if (ret) { | 8831 | if (ret) { |
| 8824 | if (own_trans) | 8832 | if (own_trans) |
| 8825 | btrfs_end_transaction(trans, root); | 8833 | btrfs_end_transaction(trans, root); |
| @@ -8833,7 +8841,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode, | |||
| 8833 | BTRFS_FILE_EXTENT_PREALLOC); | 8841 | BTRFS_FILE_EXTENT_PREALLOC); |
| 8834 | if (ret) { | 8842 | if (ret) { |
| 8835 | btrfs_free_reserved_extent(root, ins.objectid, | 8843 | btrfs_free_reserved_extent(root, ins.objectid, |
| 8836 | ins.offset); | 8844 | ins.offset, 0); |
| 8837 | btrfs_abort_transaction(trans, root, ret); | 8845 | btrfs_abort_transaction(trans, root, ret); |
| 8838 | if (own_trans) | 8846 | if (own_trans) |
| 8839 | btrfs_end_transaction(trans, root); | 8847 | btrfs_end_transaction(trans, root); |
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 82c18ba12e3f..47aceb494d1d 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c | |||
| @@ -136,19 +136,22 @@ static unsigned int btrfs_flags_to_ioctl(unsigned int flags) | |||
| 136 | void btrfs_update_iflags(struct inode *inode) | 136 | void btrfs_update_iflags(struct inode *inode) |
| 137 | { | 137 | { |
| 138 | struct btrfs_inode *ip = BTRFS_I(inode); | 138 | struct btrfs_inode *ip = BTRFS_I(inode); |
| 139 | 139 | unsigned int new_fl = 0; | |
| 140 | inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC); | ||
| 141 | 140 | ||
| 142 | if (ip->flags & BTRFS_INODE_SYNC) | 141 | if (ip->flags & BTRFS_INODE_SYNC) |
| 143 | inode->i_flags |= S_SYNC; | 142 | new_fl |= S_SYNC; |
| 144 | if (ip->flags & BTRFS_INODE_IMMUTABLE) | 143 | if (ip->flags & BTRFS_INODE_IMMUTABLE) |
| 145 | inode->i_flags |= S_IMMUTABLE; | 144 | new_fl |= S_IMMUTABLE; |
| 146 | if (ip->flags & BTRFS_INODE_APPEND) | 145 | if (ip->flags & BTRFS_INODE_APPEND) |
| 147 | inode->i_flags |= S_APPEND; | 146 | new_fl |= S_APPEND; |
| 148 | if (ip->flags & BTRFS_INODE_NOATIME) | 147 | if (ip->flags & BTRFS_INODE_NOATIME) |
| 149 | inode->i_flags |= S_NOATIME; | 148 | new_fl |= S_NOATIME; |
| 150 | if (ip->flags & BTRFS_INODE_DIRSYNC) | 149 | if (ip->flags & BTRFS_INODE_DIRSYNC) |
| 151 | inode->i_flags |= S_DIRSYNC; | 150 | new_fl |= S_DIRSYNC; |
| 151 | |||
| 152 | set_mask_bits(&inode->i_flags, | ||
| 153 | S_SYNC | S_APPEND | S_IMMUTABLE | S_NOATIME | S_DIRSYNC, | ||
| 154 | new_fl); | ||
| 152 | } | 155 | } |
| 153 | 156 | ||
| 154 | /* | 157 | /* |
| @@ -1957,7 +1960,8 @@ static noinline int copy_to_sk(struct btrfs_root *root, | |||
| 1957 | struct btrfs_path *path, | 1960 | struct btrfs_path *path, |
| 1958 | struct btrfs_key *key, | 1961 | struct btrfs_key *key, |
| 1959 | struct btrfs_ioctl_search_key *sk, | 1962 | struct btrfs_ioctl_search_key *sk, |
| 1960 | char *buf, | 1963 | size_t *buf_size, |
| 1964 | char __user *ubuf, | ||
| 1961 | unsigned long *sk_offset, | 1965 | unsigned long *sk_offset, |
| 1962 | int *num_found) | 1966 | int *num_found) |
| 1963 | { | 1967 | { |
| @@ -1989,13 +1993,25 @@ static noinline int copy_to_sk(struct btrfs_root *root, | |||
| 1989 | if (!key_in_sk(key, sk)) | 1993 | if (!key_in_sk(key, sk)) |
| 1990 | continue; | 1994 | continue; |
| 1991 | 1995 | ||
| 1992 | if (sizeof(sh) + item_len > BTRFS_SEARCH_ARGS_BUFSIZE) | 1996 | if (sizeof(sh) + item_len > *buf_size) { |
| 1997 | if (*num_found) { | ||
| 1998 | ret = 1; | ||
| 1999 | goto out; | ||
| 2000 | } | ||
| 2001 | |||
| 2002 | /* | ||
| 2003 | * return one empty item back for v1, which does not | ||
| 2004 | * handle -EOVERFLOW | ||
| 2005 | */ | ||
| 2006 | |||
| 2007 | *buf_size = sizeof(sh) + item_len; | ||
| 1993 | item_len = 0; | 2008 | item_len = 0; |
| 2009 | ret = -EOVERFLOW; | ||
| 2010 | } | ||
| 1994 | 2011 | ||
| 1995 | if (sizeof(sh) + item_len + *sk_offset > | 2012 | if (sizeof(sh) + item_len + *sk_offset > *buf_size) { |
| 1996 | BTRFS_SEARCH_ARGS_BUFSIZE) { | ||
| 1997 | ret = 1; | 2013 | ret = 1; |
| 1998 | goto overflow; | 2014 | goto out; |
| 1999 | } | 2015 | } |
| 2000 | 2016 | ||
| 2001 | sh.objectid = key->objectid; | 2017 | sh.objectid = key->objectid; |
| @@ -2005,20 +2021,33 @@ static noinline int copy_to_sk(struct btrfs_root *root, | |||
| 2005 | sh.transid = found_transid; | 2021 | sh.transid = found_transid; |
| 2006 | 2022 | ||
| 2007 | /* copy search result header */ | 2023 | /* copy search result header */ |
| 2008 | memcpy(buf + *sk_offset, &sh, sizeof(sh)); | 2024 | if (copy_to_user(ubuf + *sk_offset, &sh, sizeof(sh))) { |
| 2025 | ret = -EFAULT; | ||
| 2026 | goto out; | ||
| 2027 | } | ||
| 2028 | |||
| 2009 | *sk_offset += sizeof(sh); | 2029 | *sk_offset += sizeof(sh); |
| 2010 | 2030 | ||
| 2011 | if (item_len) { | 2031 | if (item_len) { |
| 2012 | char *p = buf + *sk_offset; | 2032 | char __user *up = ubuf + *sk_offset; |
| 2013 | /* copy the item */ | 2033 | /* copy the item */ |
| 2014 | read_extent_buffer(leaf, p, | 2034 | if (read_extent_buffer_to_user(leaf, up, |
| 2015 | item_off, item_len); | 2035 | item_off, item_len)) { |
| 2036 | ret = -EFAULT; | ||
| 2037 | goto out; | ||
| 2038 | } | ||
| 2039 | |||
| 2016 | *sk_offset += item_len; | 2040 | *sk_offset += item_len; |
| 2017 | } | 2041 | } |
| 2018 | (*num_found)++; | 2042 | (*num_found)++; |
| 2019 | 2043 | ||
| 2020 | if (*num_found >= sk->nr_items) | 2044 | if (ret) /* -EOVERFLOW from above */ |
| 2021 | break; | 2045 | goto out; |
| 2046 | |||
| 2047 | if (*num_found >= sk->nr_items) { | ||
| 2048 | ret = 1; | ||
| 2049 | goto out; | ||
| 2050 | } | ||
| 2022 | } | 2051 | } |
| 2023 | advance_key: | 2052 | advance_key: |
| 2024 | ret = 0; | 2053 | ret = 0; |
| @@ -2033,22 +2062,37 @@ advance_key: | |||
| 2033 | key->objectid++; | 2062 | key->objectid++; |
| 2034 | } else | 2063 | } else |
| 2035 | ret = 1; | 2064 | ret = 1; |
| 2036 | overflow: | 2065 | out: |
| 2066 | /* | ||
| 2067 | * 0: all items from this leaf copied, continue with next | ||
| 2068 | * 1: * more items can be copied, but unused buffer is too small | ||
| 2069 | * * all items were found | ||
| 2070 | * Either way, it will stops the loop which iterates to the next | ||
| 2071 | * leaf | ||
| 2072 | * -EOVERFLOW: item was to large for buffer | ||
| 2073 | * -EFAULT: could not copy extent buffer back to userspace | ||
| 2074 | */ | ||
| 2037 | return ret; | 2075 | return ret; |
| 2038 | } | 2076 | } |
| 2039 | 2077 | ||
| 2040 | static noinline int search_ioctl(struct inode *inode, | 2078 | static noinline int search_ioctl(struct inode *inode, |
| 2041 | struct btrfs_ioctl_search_args *args) | 2079 | struct btrfs_ioctl_search_key *sk, |
| 2080 | size_t *buf_size, | ||
| 2081 | char __user *ubuf) | ||
| 2042 | { | 2082 | { |
| 2043 | struct btrfs_root *root; | 2083 | struct btrfs_root *root; |
| 2044 | struct btrfs_key key; | 2084 | struct btrfs_key key; |
| 2045 | struct btrfs_path *path; | 2085 | struct btrfs_path *path; |
| 2046 | struct btrfs_ioctl_search_key *sk = &args->key; | ||
| 2047 | struct btrfs_fs_info *info = BTRFS_I(inode)->root->fs_info; | 2086 | struct btrfs_fs_info *info = BTRFS_I(inode)->root->fs_info; |
| 2048 | int ret; | 2087 | int ret; |
| 2049 | int num_found = 0; | 2088 | int num_found = 0; |
| 2050 | unsigned long sk_offset = 0; | 2089 | unsigned long sk_offset = 0; |
| 2051 | 2090 | ||
| 2091 | if (*buf_size < sizeof(struct btrfs_ioctl_search_header)) { | ||
| 2092 | *buf_size = sizeof(struct btrfs_ioctl_search_header); | ||
| 2093 | return -EOVERFLOW; | ||
| 2094 | } | ||
| 2095 | |||
| 2052 | path = btrfs_alloc_path(); | 2096 | path = btrfs_alloc_path(); |
| 2053 | if (!path) | 2097 | if (!path) |
| 2054 | return -ENOMEM; | 2098 | return -ENOMEM; |
| @@ -2082,14 +2126,15 @@ static noinline int search_ioctl(struct inode *inode, | |||
| 2082 | ret = 0; | 2126 | ret = 0; |
| 2083 | goto err; | 2127 | goto err; |
| 2084 | } | 2128 | } |
| 2085 | ret = copy_to_sk(root, path, &key, sk, args->buf, | 2129 | ret = copy_to_sk(root, path, &key, sk, buf_size, ubuf, |
| 2086 | &sk_offset, &num_found); | 2130 | &sk_offset, &num_found); |
| 2087 | btrfs_release_path(path); | 2131 | btrfs_release_path(path); |
| 2088 | if (ret || num_found >= sk->nr_items) | 2132 | if (ret) |
| 2089 | break; | 2133 | break; |
| 2090 | 2134 | ||
| 2091 | } | 2135 | } |
| 2092 | ret = 0; | 2136 | if (ret > 0) |
| 2137 | ret = 0; | ||
| 2093 | err: | 2138 | err: |
| 2094 | sk->nr_items = num_found; | 2139 | sk->nr_items = num_found; |
| 2095 | btrfs_free_path(path); | 2140 | btrfs_free_path(path); |
| @@ -2099,22 +2144,73 @@ err: | |||
| 2099 | static noinline int btrfs_ioctl_tree_search(struct file *file, | 2144 | static noinline int btrfs_ioctl_tree_search(struct file *file, |
| 2100 | void __user *argp) | 2145 | void __user *argp) |
| 2101 | { | 2146 | { |
| 2102 | struct btrfs_ioctl_search_args *args; | 2147 | struct btrfs_ioctl_search_args __user *uargs; |
| 2103 | struct inode *inode; | 2148 | struct btrfs_ioctl_search_key sk; |
| 2104 | int ret; | 2149 | struct inode *inode; |
| 2150 | int ret; | ||
| 2151 | size_t buf_size; | ||
| 2105 | 2152 | ||
| 2106 | if (!capable(CAP_SYS_ADMIN)) | 2153 | if (!capable(CAP_SYS_ADMIN)) |
| 2107 | return -EPERM; | 2154 | return -EPERM; |
| 2108 | 2155 | ||
| 2109 | args = memdup_user(argp, sizeof(*args)); | 2156 | uargs = (struct btrfs_ioctl_search_args __user *)argp; |
| 2110 | if (IS_ERR(args)) | 2157 | |
| 2111 | return PTR_ERR(args); | 2158 | if (copy_from_user(&sk, &uargs->key, sizeof(sk))) |
| 2159 | return -EFAULT; | ||
| 2160 | |||
| 2161 | buf_size = sizeof(uargs->buf); | ||
| 2112 | 2162 | ||
| 2113 | inode = file_inode(file); | 2163 | inode = file_inode(file); |
| 2114 | ret = search_ioctl(inode, args); | 2164 | ret = search_ioctl(inode, &sk, &buf_size, uargs->buf); |
| 2115 | if (ret == 0 && copy_to_user(argp, args, sizeof(*args))) | 2165 | |
| 2166 | /* | ||
| 2167 | * In the origin implementation an overflow is handled by returning a | ||
| 2168 | * search header with a len of zero, so reset ret. | ||
| 2169 | */ | ||
| 2170 | if (ret == -EOVERFLOW) | ||
| 2171 | ret = 0; | ||
| 2172 | |||
| 2173 | if (ret == 0 && copy_to_user(&uargs->key, &sk, sizeof(sk))) | ||
| 2116 | ret = -EFAULT; | 2174 | ret = -EFAULT; |
| 2117 | kfree(args); | 2175 | return ret; |
| 2176 | } | ||
| 2177 | |||
| 2178 | static noinline int btrfs_ioctl_tree_search_v2(struct file *file, | ||
| 2179 | void __user *argp) | ||
| 2180 | { | ||
| 2181 | struct btrfs_ioctl_search_args_v2 __user *uarg; | ||
| 2182 | struct btrfs_ioctl_search_args_v2 args; | ||
| 2183 | struct inode *inode; | ||
| 2184 | int ret; | ||
| 2185 | size_t buf_size; | ||
| 2186 | const size_t buf_limit = 16 * 1024 * 1024; | ||
| 2187 | |||
| 2188 | if (!capable(CAP_SYS_ADMIN)) | ||
| 2189 | return -EPERM; | ||
| 2190 | |||
| 2191 | /* copy search header and buffer size */ | ||
| 2192 | uarg = (struct btrfs_ioctl_search_args_v2 __user *)argp; | ||
| 2193 | if (copy_from_user(&args, uarg, sizeof(args))) | ||
| 2194 | return -EFAULT; | ||
| 2195 | |||
| 2196 | buf_size = args.buf_size; | ||
| 2197 | |||
| 2198 | if (buf_size < sizeof(struct btrfs_ioctl_search_header)) | ||
| 2199 | return -EOVERFLOW; | ||
| 2200 | |||
| 2201 | /* limit result size to 16MB */ | ||
| 2202 | if (buf_size > buf_limit) | ||
| 2203 | buf_size = buf_limit; | ||
| 2204 | |||
| 2205 | inode = file_inode(file); | ||
| 2206 | ret = search_ioctl(inode, &args.key, &buf_size, | ||
| 2207 | (char *)(&uarg->buf[0])); | ||
| 2208 | if (ret == 0 && copy_to_user(&uarg->key, &args.key, sizeof(args.key))) | ||
| 2209 | ret = -EFAULT; | ||
| 2210 | else if (ret == -EOVERFLOW && | ||
| 2211 | copy_to_user(&uarg->buf_size, &buf_size, sizeof(buf_size))) | ||
| 2212 | ret = -EFAULT; | ||
| 2213 | |||
| 2118 | return ret; | 2214 | return ret; |
| 2119 | } | 2215 | } |
| 2120 | 2216 | ||
| @@ -3046,7 +3142,6 @@ out: | |||
| 3046 | static void clone_update_extent_map(struct inode *inode, | 3142 | static void clone_update_extent_map(struct inode *inode, |
| 3047 | const struct btrfs_trans_handle *trans, | 3143 | const struct btrfs_trans_handle *trans, |
| 3048 | const struct btrfs_path *path, | 3144 | const struct btrfs_path *path, |
| 3049 | struct btrfs_file_extent_item *fi, | ||
| 3050 | const u64 hole_offset, | 3145 | const u64 hole_offset, |
| 3051 | const u64 hole_len) | 3146 | const u64 hole_len) |
| 3052 | { | 3147 | { |
| @@ -3061,7 +3156,11 @@ static void clone_update_extent_map(struct inode *inode, | |||
| 3061 | return; | 3156 | return; |
| 3062 | } | 3157 | } |
| 3063 | 3158 | ||
| 3064 | if (fi) { | 3159 | if (path) { |
| 3160 | struct btrfs_file_extent_item *fi; | ||
| 3161 | |||
| 3162 | fi = btrfs_item_ptr(path->nodes[0], path->slots[0], | ||
| 3163 | struct btrfs_file_extent_item); | ||
| 3065 | btrfs_extent_item_to_extent_map(inode, path, fi, false, em); | 3164 | btrfs_extent_item_to_extent_map(inode, path, fi, false, em); |
| 3066 | em->generation = -1; | 3165 | em->generation = -1; |
| 3067 | if (btrfs_file_extent_type(path->nodes[0], fi) == | 3166 | if (btrfs_file_extent_type(path->nodes[0], fi) == |
| @@ -3415,18 +3514,15 @@ process_slot: | |||
| 3415 | btrfs_item_ptr_offset(leaf, slot), | 3514 | btrfs_item_ptr_offset(leaf, slot), |
| 3416 | size); | 3515 | size); |
| 3417 | inode_add_bytes(inode, datal); | 3516 | inode_add_bytes(inode, datal); |
| 3418 | extent = btrfs_item_ptr(leaf, slot, | ||
| 3419 | struct btrfs_file_extent_item); | ||
| 3420 | } | 3517 | } |
| 3421 | 3518 | ||
| 3422 | /* If we have an implicit hole (NO_HOLES feature). */ | 3519 | /* If we have an implicit hole (NO_HOLES feature). */ |
| 3423 | if (drop_start < new_key.offset) | 3520 | if (drop_start < new_key.offset) |
| 3424 | clone_update_extent_map(inode, trans, | 3521 | clone_update_extent_map(inode, trans, |
| 3425 | path, NULL, drop_start, | 3522 | NULL, drop_start, |
| 3426 | new_key.offset - drop_start); | 3523 | new_key.offset - drop_start); |
| 3427 | 3524 | ||
| 3428 | clone_update_extent_map(inode, trans, path, | 3525 | clone_update_extent_map(inode, trans, path, 0, 0); |
| 3429 | extent, 0, 0); | ||
| 3430 | 3526 | ||
| 3431 | btrfs_mark_buffer_dirty(leaf); | 3527 | btrfs_mark_buffer_dirty(leaf); |
| 3432 | btrfs_release_path(path); | 3528 | btrfs_release_path(path); |
| @@ -3469,12 +3565,10 @@ process_slot: | |||
| 3469 | btrfs_end_transaction(trans, root); | 3565 | btrfs_end_transaction(trans, root); |
| 3470 | goto out; | 3566 | goto out; |
| 3471 | } | 3567 | } |
| 3568 | clone_update_extent_map(inode, trans, NULL, last_dest_end, | ||
| 3569 | destoff + len - last_dest_end); | ||
| 3472 | ret = clone_finish_inode_update(trans, inode, destoff + len, | 3570 | ret = clone_finish_inode_update(trans, inode, destoff + len, |
| 3473 | destoff, olen); | 3571 | destoff, olen); |
| 3474 | if (ret) | ||
| 3475 | goto out; | ||
| 3476 | clone_update_extent_map(inode, trans, path, NULL, last_dest_end, | ||
| 3477 | destoff + len - last_dest_end); | ||
| 3478 | } | 3572 | } |
| 3479 | 3573 | ||
| 3480 | out: | 3574 | out: |
| @@ -5198,6 +5292,8 @@ long btrfs_ioctl(struct file *file, unsigned int | |||
| 5198 | return btrfs_ioctl_trans_end(file); | 5292 | return btrfs_ioctl_trans_end(file); |
| 5199 | case BTRFS_IOC_TREE_SEARCH: | 5293 | case BTRFS_IOC_TREE_SEARCH: |
| 5200 | return btrfs_ioctl_tree_search(file, argp); | 5294 | return btrfs_ioctl_tree_search(file, argp); |
| 5295 | case BTRFS_IOC_TREE_SEARCH_V2: | ||
| 5296 | return btrfs_ioctl_tree_search_v2(file, argp); | ||
| 5201 | case BTRFS_IOC_INO_LOOKUP: | 5297 | case BTRFS_IOC_INO_LOOKUP: |
| 5202 | return btrfs_ioctl_ino_lookup(file, argp); | 5298 | return btrfs_ioctl_ino_lookup(file, argp); |
| 5203 | case BTRFS_IOC_INO_PATHS: | 5299 | case BTRFS_IOC_INO_PATHS: |
diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c index 01277b8f2373..5665d2149249 100644 --- a/fs/btrfs/locking.c +++ b/fs/btrfs/locking.c | |||
| @@ -33,14 +33,14 @@ static void btrfs_assert_tree_read_locked(struct extent_buffer *eb); | |||
| 33 | */ | 33 | */ |
| 34 | void btrfs_set_lock_blocking_rw(struct extent_buffer *eb, int rw) | 34 | void btrfs_set_lock_blocking_rw(struct extent_buffer *eb, int rw) |
| 35 | { | 35 | { |
| 36 | if (eb->lock_nested) { | 36 | /* |
| 37 | read_lock(&eb->lock); | 37 | * no lock is required. The lock owner may change if |
| 38 | if (eb->lock_nested && current->pid == eb->lock_owner) { | 38 | * we have a read lock, but it won't change to or away |
| 39 | read_unlock(&eb->lock); | 39 | * from us. If we have the write lock, we are the owner |
| 40 | return; | 40 | * and it'll never change. |
| 41 | } | 41 | */ |
| 42 | read_unlock(&eb->lock); | 42 | if (eb->lock_nested && current->pid == eb->lock_owner) |
| 43 | } | 43 | return; |
| 44 | if (rw == BTRFS_WRITE_LOCK) { | 44 | if (rw == BTRFS_WRITE_LOCK) { |
| 45 | if (atomic_read(&eb->blocking_writers) == 0) { | 45 | if (atomic_read(&eb->blocking_writers) == 0) { |
| 46 | WARN_ON(atomic_read(&eb->spinning_writers) != 1); | 46 | WARN_ON(atomic_read(&eb->spinning_writers) != 1); |
| @@ -65,14 +65,15 @@ void btrfs_set_lock_blocking_rw(struct extent_buffer *eb, int rw) | |||
| 65 | */ | 65 | */ |
| 66 | void btrfs_clear_lock_blocking_rw(struct extent_buffer *eb, int rw) | 66 | void btrfs_clear_lock_blocking_rw(struct extent_buffer *eb, int rw) |
| 67 | { | 67 | { |
| 68 | if (eb->lock_nested) { | 68 | /* |
| 69 | read_lock(&eb->lock); | 69 | * no lock is required. The lock owner may change if |
| 70 | if (eb->lock_nested && current->pid == eb->lock_owner) { | 70 | * we have a read lock, but it won't change to or away |
| 71 | read_unlock(&eb->lock); | 71 | * from us. If we have the write lock, we are the owner |
| 72 | return; | 72 | * and it'll never change. |
| 73 | } | 73 | */ |
| 74 | read_unlock(&eb->lock); | 74 | if (eb->lock_nested && current->pid == eb->lock_owner) |
| 75 | } | 75 | return; |
| 76 | |||
| 76 | if (rw == BTRFS_WRITE_LOCK_BLOCKING) { | 77 | if (rw == BTRFS_WRITE_LOCK_BLOCKING) { |
| 77 | BUG_ON(atomic_read(&eb->blocking_writers) != 1); | 78 | BUG_ON(atomic_read(&eb->blocking_writers) != 1); |
| 78 | write_lock(&eb->lock); | 79 | write_lock(&eb->lock); |
| @@ -99,6 +100,9 @@ void btrfs_clear_lock_blocking_rw(struct extent_buffer *eb, int rw) | |||
| 99 | void btrfs_tree_read_lock(struct extent_buffer *eb) | 100 | void btrfs_tree_read_lock(struct extent_buffer *eb) |
| 100 | { | 101 | { |
| 101 | again: | 102 | again: |
| 103 | BUG_ON(!atomic_read(&eb->blocking_writers) && | ||
| 104 | current->pid == eb->lock_owner); | ||
| 105 | |||
| 102 | read_lock(&eb->lock); | 106 | read_lock(&eb->lock); |
| 103 | if (atomic_read(&eb->blocking_writers) && | 107 | if (atomic_read(&eb->blocking_writers) && |
| 104 | current->pid == eb->lock_owner) { | 108 | current->pid == eb->lock_owner) { |
| @@ -132,7 +136,9 @@ int btrfs_try_tree_read_lock(struct extent_buffer *eb) | |||
| 132 | if (atomic_read(&eb->blocking_writers)) | 136 | if (atomic_read(&eb->blocking_writers)) |
| 133 | return 0; | 137 | return 0; |
| 134 | 138 | ||
| 135 | read_lock(&eb->lock); | 139 | if (!read_trylock(&eb->lock)) |
| 140 | return 0; | ||
| 141 | |||
| 136 | if (atomic_read(&eb->blocking_writers)) { | 142 | if (atomic_read(&eb->blocking_writers)) { |
| 137 | read_unlock(&eb->lock); | 143 | read_unlock(&eb->lock); |
| 138 | return 0; | 144 | return 0; |
| @@ -151,7 +157,10 @@ int btrfs_try_tree_write_lock(struct extent_buffer *eb) | |||
| 151 | if (atomic_read(&eb->blocking_writers) || | 157 | if (atomic_read(&eb->blocking_writers) || |
| 152 | atomic_read(&eb->blocking_readers)) | 158 | atomic_read(&eb->blocking_readers)) |
| 153 | return 0; | 159 | return 0; |
| 154 | write_lock(&eb->lock); | 160 | |
| 161 | if (!write_trylock(&eb->lock)) | ||
| 162 | return 0; | ||
| 163 | |||
| 155 | if (atomic_read(&eb->blocking_writers) || | 164 | if (atomic_read(&eb->blocking_writers) || |
| 156 | atomic_read(&eb->blocking_readers)) { | 165 | atomic_read(&eb->blocking_readers)) { |
| 157 | write_unlock(&eb->lock); | 166 | write_unlock(&eb->lock); |
| @@ -168,14 +177,15 @@ int btrfs_try_tree_write_lock(struct extent_buffer *eb) | |||
| 168 | */ | 177 | */ |
| 169 | void btrfs_tree_read_unlock(struct extent_buffer *eb) | 178 | void btrfs_tree_read_unlock(struct extent_buffer *eb) |
| 170 | { | 179 | { |
| 171 | if (eb->lock_nested) { | 180 | /* |
| 172 | read_lock(&eb->lock); | 181 | * if we're nested, we have the write lock. No new locking |
| 173 | if (eb->lock_nested && current->pid == eb->lock_owner) { | 182 | * is needed as long as we are the lock owner. |
| 174 | eb->lock_nested = 0; | 183 | * The write unlock will do a barrier for us, and the lock_nested |
| 175 | read_unlock(&eb->lock); | 184 | * field only matters to the lock owner. |
| 176 | return; | 185 | */ |
| 177 | } | 186 | if (eb->lock_nested && current->pid == eb->lock_owner) { |
| 178 | read_unlock(&eb->lock); | 187 | eb->lock_nested = 0; |
| 188 | return; | ||
| 179 | } | 189 | } |
| 180 | btrfs_assert_tree_read_locked(eb); | 190 | btrfs_assert_tree_read_locked(eb); |
| 181 | WARN_ON(atomic_read(&eb->spinning_readers) == 0); | 191 | WARN_ON(atomic_read(&eb->spinning_readers) == 0); |
| @@ -189,14 +199,15 @@ void btrfs_tree_read_unlock(struct extent_buffer *eb) | |||
| 189 | */ | 199 | */ |
| 190 | void btrfs_tree_read_unlock_blocking(struct extent_buffer *eb) | 200 | void btrfs_tree_read_unlock_blocking(struct extent_buffer *eb) |
| 191 | { | 201 | { |
| 192 | if (eb->lock_nested) { | 202 | /* |
| 193 | read_lock(&eb->lock); | 203 | * if we're nested, we have the write lock. No new locking |
| 194 | if (eb->lock_nested && current->pid == eb->lock_owner) { | 204 | * is needed as long as we are the lock owner. |
| 195 | eb->lock_nested = 0; | 205 | * The write unlock will do a barrier for us, and the lock_nested |
| 196 | read_unlock(&eb->lock); | 206 | * field only matters to the lock owner. |
| 197 | return; | 207 | */ |
| 198 | } | 208 | if (eb->lock_nested && current->pid == eb->lock_owner) { |
| 199 | read_unlock(&eb->lock); | 209 | eb->lock_nested = 0; |
| 210 | return; | ||
| 200 | } | 211 | } |
| 201 | btrfs_assert_tree_read_locked(eb); | 212 | btrfs_assert_tree_read_locked(eb); |
| 202 | WARN_ON(atomic_read(&eb->blocking_readers) == 0); | 213 | WARN_ON(atomic_read(&eb->blocking_readers) == 0); |
| @@ -244,6 +255,7 @@ void btrfs_tree_unlock(struct extent_buffer *eb) | |||
| 244 | BUG_ON(blockers > 1); | 255 | BUG_ON(blockers > 1); |
| 245 | 256 | ||
| 246 | btrfs_assert_tree_locked(eb); | 257 | btrfs_assert_tree_locked(eb); |
| 258 | eb->lock_owner = 0; | ||
| 247 | atomic_dec(&eb->write_locks); | 259 | atomic_dec(&eb->write_locks); |
| 248 | 260 | ||
| 249 | if (blockers) { | 261 | if (blockers) { |
diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index 6efd70d3b64f..9626b4ad3b9a 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c | |||
| @@ -54,7 +54,7 @@ static void print_extent_data_ref(struct extent_buffer *eb, | |||
| 54 | btrfs_extent_data_ref_count(eb, ref)); | 54 | btrfs_extent_data_ref_count(eb, ref)); |
| 55 | } | 55 | } |
| 56 | 56 | ||
| 57 | static void print_extent_item(struct extent_buffer *eb, int slot) | 57 | static void print_extent_item(struct extent_buffer *eb, int slot, int type) |
| 58 | { | 58 | { |
| 59 | struct btrfs_extent_item *ei; | 59 | struct btrfs_extent_item *ei; |
| 60 | struct btrfs_extent_inline_ref *iref; | 60 | struct btrfs_extent_inline_ref *iref; |
| @@ -63,7 +63,6 @@ static void print_extent_item(struct extent_buffer *eb, int slot) | |||
| 63 | struct btrfs_disk_key key; | 63 | struct btrfs_disk_key key; |
| 64 | unsigned long end; | 64 | unsigned long end; |
| 65 | unsigned long ptr; | 65 | unsigned long ptr; |
| 66 | int type; | ||
| 67 | u32 item_size = btrfs_item_size_nr(eb, slot); | 66 | u32 item_size = btrfs_item_size_nr(eb, slot); |
| 68 | u64 flags; | 67 | u64 flags; |
| 69 | u64 offset; | 68 | u64 offset; |
| @@ -88,7 +87,8 @@ static void print_extent_item(struct extent_buffer *eb, int slot) | |||
| 88 | btrfs_extent_refs(eb, ei), btrfs_extent_generation(eb, ei), | 87 | btrfs_extent_refs(eb, ei), btrfs_extent_generation(eb, ei), |
| 89 | flags); | 88 | flags); |
| 90 | 89 | ||
| 91 | if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { | 90 | if ((type == BTRFS_EXTENT_ITEM_KEY) && |
| 91 | flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { | ||
| 92 | struct btrfs_tree_block_info *info; | 92 | struct btrfs_tree_block_info *info; |
| 93 | info = (struct btrfs_tree_block_info *)(ei + 1); | 93 | info = (struct btrfs_tree_block_info *)(ei + 1); |
| 94 | btrfs_tree_block_key(eb, info, &key); | 94 | btrfs_tree_block_key(eb, info, &key); |
| @@ -223,7 +223,8 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) | |||
| 223 | btrfs_disk_root_refs(l, ri)); | 223 | btrfs_disk_root_refs(l, ri)); |
| 224 | break; | 224 | break; |
| 225 | case BTRFS_EXTENT_ITEM_KEY: | 225 | case BTRFS_EXTENT_ITEM_KEY: |
| 226 | print_extent_item(l, i); | 226 | case BTRFS_METADATA_ITEM_KEY: |
| 227 | print_extent_item(l, i, type); | ||
| 227 | break; | 228 | break; |
| 228 | case BTRFS_TREE_BLOCK_REF_KEY: | 229 | case BTRFS_TREE_BLOCK_REF_KEY: |
| 229 | printk(KERN_INFO "\t\ttree block backref\n"); | 230 | printk(KERN_INFO "\t\ttree block backref\n"); |
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index cf5aead95a7f..98cb6b2630f9 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c | |||
| @@ -1798,8 +1798,10 @@ static int qgroup_shared_accounting(struct btrfs_trans_handle *trans, | |||
| 1798 | return -ENOMEM; | 1798 | return -ENOMEM; |
| 1799 | 1799 | ||
| 1800 | tmp = ulist_alloc(GFP_NOFS); | 1800 | tmp = ulist_alloc(GFP_NOFS); |
| 1801 | if (!tmp) | 1801 | if (!tmp) { |
| 1802 | ulist_free(qgroups); | ||
| 1802 | return -ENOMEM; | 1803 | return -ENOMEM; |
| 1804 | } | ||
| 1803 | 1805 | ||
| 1804 | btrfs_get_tree_mod_seq(fs_info, &elem); | 1806 | btrfs_get_tree_mod_seq(fs_info, &elem); |
| 1805 | ret = btrfs_find_all_roots(trans, fs_info, oper->bytenr, elem.seq, | 1807 | ret = btrfs_find_all_roots(trans, fs_info, oper->bytenr, elem.seq, |
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index 4055291a523e..4a88f073fdd7 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c | |||
| @@ -1956,9 +1956,10 @@ static int __raid56_parity_recover(struct btrfs_raid_bio *rbio) | |||
| 1956 | * pages are going to be uptodate. | 1956 | * pages are going to be uptodate. |
| 1957 | */ | 1957 | */ |
| 1958 | for (stripe = 0; stripe < bbio->num_stripes; stripe++) { | 1958 | for (stripe = 0; stripe < bbio->num_stripes; stripe++) { |
| 1959 | if (rbio->faila == stripe || | 1959 | if (rbio->faila == stripe || rbio->failb == stripe) { |
| 1960 | rbio->failb == stripe) | 1960 | atomic_inc(&rbio->bbio->error); |
| 1961 | continue; | 1961 | continue; |
| 1962 | } | ||
| 1962 | 1963 | ||
| 1963 | for (pagenr = 0; pagenr < nr_pages; pagenr++) { | 1964 | for (pagenr = 0; pagenr < nr_pages; pagenr++) { |
| 1964 | struct page *p; | 1965 | struct page *p; |
diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c index 30947f923620..09230cf3a244 100644 --- a/fs/btrfs/reada.c +++ b/fs/btrfs/reada.c | |||
| @@ -428,8 +428,13 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, | |||
| 428 | continue; | 428 | continue; |
| 429 | } | 429 | } |
| 430 | if (!dev->bdev) { | 430 | if (!dev->bdev) { |
| 431 | /* cannot read ahead on missing device */ | 431 | /* |
| 432 | continue; | 432 | * cannot read ahead on missing device, but for RAID5/6, |
| 433 | * REQ_GET_READ_MIRRORS return 1. So don't skip missing | ||
| 434 | * device for such case. | ||
| 435 | */ | ||
| 436 | if (nzones > 1) | ||
| 437 | continue; | ||
| 433 | } | 438 | } |
| 434 | if (dev_replace_is_ongoing && | 439 | if (dev_replace_is_ongoing && |
| 435 | dev == fs_info->dev_replace.tgtdev) { | 440 | dev == fs_info->dev_replace.tgtdev) { |
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index ac80188eec88..b6d198f5181e 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c | |||
| @@ -2725,11 +2725,8 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx, | |||
| 2725 | dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent); | 2725 | dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent); |
| 2726 | length = btrfs_dev_extent_length(l, dev_extent); | 2726 | length = btrfs_dev_extent_length(l, dev_extent); |
| 2727 | 2727 | ||
| 2728 | if (found_key.offset + length <= start) { | 2728 | if (found_key.offset + length <= start) |
| 2729 | key.offset = found_key.offset + length; | 2729 | goto skip; |
| 2730 | btrfs_release_path(path); | ||
| 2731 | continue; | ||
| 2732 | } | ||
| 2733 | 2730 | ||
| 2734 | chunk_tree = btrfs_dev_extent_chunk_tree(l, dev_extent); | 2731 | chunk_tree = btrfs_dev_extent_chunk_tree(l, dev_extent); |
| 2735 | chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent); | 2732 | chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent); |
| @@ -2740,10 +2737,12 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx, | |||
| 2740 | * the chunk from going away while we scrub it | 2737 | * the chunk from going away while we scrub it |
| 2741 | */ | 2738 | */ |
| 2742 | cache = btrfs_lookup_block_group(fs_info, chunk_offset); | 2739 | cache = btrfs_lookup_block_group(fs_info, chunk_offset); |
| 2743 | if (!cache) { | 2740 | |
| 2744 | ret = -ENOENT; | 2741 | /* some chunks are removed but not committed to disk yet, |
| 2745 | break; | 2742 | * continue scrubbing */ |
| 2746 | } | 2743 | if (!cache) |
| 2744 | goto skip; | ||
| 2745 | |||
| 2747 | dev_replace->cursor_right = found_key.offset + length; | 2746 | dev_replace->cursor_right = found_key.offset + length; |
| 2748 | dev_replace->cursor_left = found_key.offset; | 2747 | dev_replace->cursor_left = found_key.offset; |
| 2749 | dev_replace->item_needs_writeback = 1; | 2748 | dev_replace->item_needs_writeback = 1; |
| @@ -2802,7 +2801,7 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx, | |||
| 2802 | 2801 | ||
| 2803 | dev_replace->cursor_left = dev_replace->cursor_right; | 2802 | dev_replace->cursor_left = dev_replace->cursor_right; |
| 2804 | dev_replace->item_needs_writeback = 1; | 2803 | dev_replace->item_needs_writeback = 1; |
| 2805 | 2804 | skip: | |
| 2806 | key.offset = found_key.offset + length; | 2805 | key.offset = found_key.offset + length; |
| 2807 | btrfs_release_path(path); | 2806 | btrfs_release_path(path); |
| 2808 | } | 2807 | } |
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 4662d92a4b73..8e16bca69c56 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c | |||
| @@ -522,9 +522,10 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) | |||
| 522 | case Opt_ssd_spread: | 522 | case Opt_ssd_spread: |
| 523 | btrfs_set_and_info(root, SSD_SPREAD, | 523 | btrfs_set_and_info(root, SSD_SPREAD, |
| 524 | "use spread ssd allocation scheme"); | 524 | "use spread ssd allocation scheme"); |
| 525 | btrfs_set_opt(info->mount_opt, SSD); | ||
| 525 | break; | 526 | break; |
| 526 | case Opt_nossd: | 527 | case Opt_nossd: |
| 527 | btrfs_clear_and_info(root, NOSSD, | 528 | btrfs_set_and_info(root, NOSSD, |
| 528 | "not using ssd allocation scheme"); | 529 | "not using ssd allocation scheme"); |
| 529 | btrfs_clear_opt(info->mount_opt, SSD); | 530 | btrfs_clear_opt(info->mount_opt, SSD); |
| 530 | break; | 531 | break; |
| @@ -1467,7 +1468,9 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) | |||
| 1467 | goto restore; | 1468 | goto restore; |
| 1468 | 1469 | ||
| 1469 | /* recover relocation */ | 1470 | /* recover relocation */ |
| 1471 | mutex_lock(&fs_info->cleaner_mutex); | ||
| 1470 | ret = btrfs_recover_relocation(root); | 1472 | ret = btrfs_recover_relocation(root); |
| 1473 | mutex_unlock(&fs_info->cleaner_mutex); | ||
| 1471 | if (ret) | 1474 | if (ret) |
| 1472 | goto restore; | 1475 | goto restore; |
| 1473 | 1476 | ||
| @@ -1808,6 +1811,8 @@ static int btrfs_show_devname(struct seq_file *m, struct dentry *root) | |||
| 1808 | list_for_each_entry(dev, head, dev_list) { | 1811 | list_for_each_entry(dev, head, dev_list) { |
| 1809 | if (dev->missing) | 1812 | if (dev->missing) |
| 1810 | continue; | 1813 | continue; |
| 1814 | if (!dev->name) | ||
| 1815 | continue; | ||
| 1811 | if (!first_dev || dev->devid < first_dev->devid) | 1816 | if (!first_dev || dev->devid < first_dev->devid) |
| 1812 | first_dev = dev; | 1817 | first_dev = dev; |
| 1813 | } | 1818 | } |
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index df39458f1487..78699364f537 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c | |||
| @@ -605,14 +605,37 @@ static void init_feature_attrs(void) | |||
| 605 | } | 605 | } |
| 606 | } | 606 | } |
| 607 | 607 | ||
| 608 | static int add_device_membership(struct btrfs_fs_info *fs_info) | 608 | int btrfs_kobj_rm_device(struct btrfs_fs_info *fs_info, |
| 609 | struct btrfs_device *one_device) | ||
| 610 | { | ||
| 611 | struct hd_struct *disk; | ||
| 612 | struct kobject *disk_kobj; | ||
| 613 | |||
| 614 | if (!fs_info->device_dir_kobj) | ||
| 615 | return -EINVAL; | ||
| 616 | |||
| 617 | if (one_device) { | ||
| 618 | disk = one_device->bdev->bd_part; | ||
| 619 | disk_kobj = &part_to_dev(disk)->kobj; | ||
| 620 | |||
| 621 | sysfs_remove_link(fs_info->device_dir_kobj, | ||
| 622 | disk_kobj->name); | ||
| 623 | } | ||
| 624 | |||
| 625 | return 0; | ||
| 626 | } | ||
| 627 | |||
| 628 | int btrfs_kobj_add_device(struct btrfs_fs_info *fs_info, | ||
| 629 | struct btrfs_device *one_device) | ||
| 609 | { | 630 | { |
| 610 | int error = 0; | 631 | int error = 0; |
| 611 | struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; | 632 | struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; |
| 612 | struct btrfs_device *dev; | 633 | struct btrfs_device *dev; |
| 613 | 634 | ||
| 614 | fs_info->device_dir_kobj = kobject_create_and_add("devices", | 635 | if (!fs_info->device_dir_kobj) |
| 636 | fs_info->device_dir_kobj = kobject_create_and_add("devices", | ||
| 615 | &fs_info->super_kobj); | 637 | &fs_info->super_kobj); |
| 638 | |||
| 616 | if (!fs_info->device_dir_kobj) | 639 | if (!fs_info->device_dir_kobj) |
| 617 | return -ENOMEM; | 640 | return -ENOMEM; |
| 618 | 641 | ||
| @@ -623,6 +646,9 @@ static int add_device_membership(struct btrfs_fs_info *fs_info) | |||
| 623 | if (!dev->bdev) | 646 | if (!dev->bdev) |
| 624 | continue; | 647 | continue; |
| 625 | 648 | ||
| 649 | if (one_device && one_device != dev) | ||
| 650 | continue; | ||
| 651 | |||
| 626 | disk = dev->bdev->bd_part; | 652 | disk = dev->bdev->bd_part; |
| 627 | disk_kobj = &part_to_dev(disk)->kobj; | 653 | disk_kobj = &part_to_dev(disk)->kobj; |
| 628 | 654 | ||
| @@ -666,7 +692,7 @@ int btrfs_sysfs_add_one(struct btrfs_fs_info *fs_info) | |||
| 666 | if (error) | 692 | if (error) |
| 667 | goto failure; | 693 | goto failure; |
| 668 | 694 | ||
| 669 | error = add_device_membership(fs_info); | 695 | error = btrfs_kobj_add_device(fs_info, NULL); |
| 670 | if (error) | 696 | if (error) |
| 671 | goto failure; | 697 | goto failure; |
| 672 | 698 | ||
diff --git a/fs/btrfs/sysfs.h b/fs/btrfs/sysfs.h index 9ab576318a84..ac46df37504c 100644 --- a/fs/btrfs/sysfs.h +++ b/fs/btrfs/sysfs.h | |||
| @@ -66,4 +66,8 @@ char *btrfs_printable_features(enum btrfs_feature_set set, u64 flags); | |||
| 66 | extern const char * const btrfs_feature_set_names[3]; | 66 | extern const char * const btrfs_feature_set_names[3]; |
| 67 | extern struct kobj_type space_info_ktype; | 67 | extern struct kobj_type space_info_ktype; |
| 68 | extern struct kobj_type btrfs_raid_ktype; | 68 | extern struct kobj_type btrfs_raid_ktype; |
| 69 | int btrfs_kobj_add_device(struct btrfs_fs_info *fs_info, | ||
| 70 | struct btrfs_device *one_device); | ||
| 71 | int btrfs_kobj_rm_device(struct btrfs_fs_info *fs_info, | ||
| 72 | struct btrfs_device *one_device); | ||
| 69 | #endif /* _BTRFS_SYSFS_H_ */ | 73 | #endif /* _BTRFS_SYSFS_H_ */ |
diff --git a/fs/btrfs/tests/btrfs-tests.c b/fs/btrfs/tests/btrfs-tests.c index a5dcacb5df9c..9626252ee6b4 100644 --- a/fs/btrfs/tests/btrfs-tests.c +++ b/fs/btrfs/tests/btrfs-tests.c | |||
| @@ -135,7 +135,7 @@ restart: | |||
| 135 | radix_tree_for_each_slot(slot, &fs_info->buffer_radix, &iter, 0) { | 135 | radix_tree_for_each_slot(slot, &fs_info->buffer_radix, &iter, 0) { |
| 136 | struct extent_buffer *eb; | 136 | struct extent_buffer *eb; |
| 137 | 137 | ||
| 138 | eb = radix_tree_deref_slot(slot); | 138 | eb = radix_tree_deref_slot_protected(slot, &fs_info->buffer_lock); |
| 139 | if (!eb) | 139 | if (!eb) |
| 140 | continue; | 140 | continue; |
| 141 | /* Shouldn't happen but that kind of thinking creates CVE's */ | 141 | /* Shouldn't happen but that kind of thinking creates CVE's */ |
diff --git a/fs/btrfs/tests/qgroup-tests.c b/fs/btrfs/tests/qgroup-tests.c index fa691b754aaf..ec3dcb202357 100644 --- a/fs/btrfs/tests/qgroup-tests.c +++ b/fs/btrfs/tests/qgroup-tests.c | |||
| @@ -415,6 +415,8 @@ int btrfs_test_qgroups(void) | |||
| 415 | ret = -ENOMEM; | 415 | ret = -ENOMEM; |
| 416 | goto out; | 416 | goto out; |
| 417 | } | 417 | } |
| 418 | btrfs_set_header_level(root->node, 0); | ||
| 419 | btrfs_set_header_nritems(root->node, 0); | ||
| 418 | root->alloc_bytenr += 8192; | 420 | root->alloc_bytenr += 8192; |
| 419 | 421 | ||
| 420 | tmp_root = btrfs_alloc_dummy_root(); | 422 | tmp_root = btrfs_alloc_dummy_root(); |
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 9630f10f8e1e..5f379affdf23 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c | |||
| @@ -386,11 +386,13 @@ start_transaction(struct btrfs_root *root, u64 num_items, unsigned int type, | |||
| 386 | bool reloc_reserved = false; | 386 | bool reloc_reserved = false; |
| 387 | int ret; | 387 | int ret; |
| 388 | 388 | ||
| 389 | /* Send isn't supposed to start transactions. */ | ||
| 390 | ASSERT(current->journal_info != (void *)BTRFS_SEND_TRANS_STUB); | ||
| 391 | |||
| 389 | if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state)) | 392 | if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state)) |
| 390 | return ERR_PTR(-EROFS); | 393 | return ERR_PTR(-EROFS); |
| 391 | 394 | ||
| 392 | if (current->journal_info && | 395 | if (current->journal_info) { |
| 393 | current->journal_info != (void *)BTRFS_SEND_TRANS_STUB) { | ||
| 394 | WARN_ON(type & TRANS_EXTWRITERS); | 396 | WARN_ON(type & TRANS_EXTWRITERS); |
| 395 | h = current->journal_info; | 397 | h = current->journal_info; |
| 396 | h->use_count++; | 398 | h->use_count++; |
| @@ -491,6 +493,7 @@ again: | |||
| 491 | smp_mb(); | 493 | smp_mb(); |
| 492 | if (cur_trans->state >= TRANS_STATE_BLOCKED && | 494 | if (cur_trans->state >= TRANS_STATE_BLOCKED && |
| 493 | may_wait_transaction(root, type)) { | 495 | may_wait_transaction(root, type)) { |
| 496 | current->journal_info = h; | ||
| 494 | btrfs_commit_transaction(h, root); | 497 | btrfs_commit_transaction(h, root); |
| 495 | goto again; | 498 | goto again; |
| 496 | } | 499 | } |
| @@ -1284,11 +1287,13 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
| 1284 | goto fail; | 1287 | goto fail; |
| 1285 | } | 1288 | } |
| 1286 | 1289 | ||
| 1287 | pending->error = btrfs_qgroup_inherit(trans, fs_info, | 1290 | ret = btrfs_qgroup_inherit(trans, fs_info, |
| 1288 | root->root_key.objectid, | 1291 | root->root_key.objectid, |
| 1289 | objectid, pending->inherit); | 1292 | objectid, pending->inherit); |
| 1290 | if (pending->error) | 1293 | if (ret) { |
| 1291 | goto no_free_objectid; | 1294 | btrfs_abort_transaction(trans, root, ret); |
| 1295 | goto fail; | ||
| 1296 | } | ||
| 1292 | 1297 | ||
| 1293 | /* see comments in should_cow_block() */ | 1298 | /* see comments in should_cow_block() */ |
| 1294 | set_bit(BTRFS_ROOT_FORCE_COW, &root->state); | 1299 | set_bit(BTRFS_ROOT_FORCE_COW, &root->state); |
| @@ -1613,11 +1618,6 @@ static int btrfs_flush_all_pending_stuffs(struct btrfs_trans_handle *trans, | |||
| 1613 | int ret; | 1618 | int ret; |
| 1614 | 1619 | ||
| 1615 | ret = btrfs_run_delayed_items(trans, root); | 1620 | ret = btrfs_run_delayed_items(trans, root); |
| 1616 | /* | ||
| 1617 | * running the delayed items may have added new refs. account | ||
| 1618 | * them now so that they hinder processing of more delayed refs | ||
| 1619 | * as little as possible. | ||
| 1620 | */ | ||
| 1621 | if (ret) | 1621 | if (ret) |
| 1622 | return ret; | 1622 | return ret; |
| 1623 | 1623 | ||
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index ffeed6d6326f..6104676857f5 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c | |||
| @@ -40,6 +40,7 @@ | |||
| 40 | #include "rcu-string.h" | 40 | #include "rcu-string.h" |
| 41 | #include "math.h" | 41 | #include "math.h" |
| 42 | #include "dev-replace.h" | 42 | #include "dev-replace.h" |
| 43 | #include "sysfs.h" | ||
| 43 | 44 | ||
| 44 | static int init_first_rw_device(struct btrfs_trans_handle *trans, | 45 | static int init_first_rw_device(struct btrfs_trans_handle *trans, |
| 45 | struct btrfs_root *root, | 46 | struct btrfs_root *root, |
| @@ -554,12 +555,14 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig) | |||
| 554 | * This is ok to do without rcu read locked because we hold the | 555 | * This is ok to do without rcu read locked because we hold the |
| 555 | * uuid mutex so nothing we touch in here is going to disappear. | 556 | * uuid mutex so nothing we touch in here is going to disappear. |
| 556 | */ | 557 | */ |
| 557 | name = rcu_string_strdup(orig_dev->name->str, GFP_NOFS); | 558 | if (orig_dev->name) { |
| 558 | if (!name) { | 559 | name = rcu_string_strdup(orig_dev->name->str, GFP_NOFS); |
| 559 | kfree(device); | 560 | if (!name) { |
| 560 | goto error; | 561 | kfree(device); |
| 562 | goto error; | ||
| 563 | } | ||
| 564 | rcu_assign_pointer(device->name, name); | ||
| 561 | } | 565 | } |
| 562 | rcu_assign_pointer(device->name, name); | ||
| 563 | 566 | ||
| 564 | list_add(&device->dev_list, &fs_devices->devices); | 567 | list_add(&device->dev_list, &fs_devices->devices); |
| 565 | device->fs_devices = fs_devices; | 568 | device->fs_devices = fs_devices; |
| @@ -1680,6 +1683,9 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) | |||
| 1680 | if (device->bdev) | 1683 | if (device->bdev) |
| 1681 | device->fs_devices->open_devices--; | 1684 | device->fs_devices->open_devices--; |
| 1682 | 1685 | ||
| 1686 | /* remove sysfs entry */ | ||
| 1687 | btrfs_kobj_rm_device(root->fs_info, device); | ||
| 1688 | |||
| 1683 | call_rcu(&device->rcu, free_device); | 1689 | call_rcu(&device->rcu, free_device); |
| 1684 | 1690 | ||
| 1685 | num_devices = btrfs_super_num_devices(root->fs_info->super_copy) - 1; | 1691 | num_devices = btrfs_super_num_devices(root->fs_info->super_copy) - 1; |
| @@ -2143,9 +2149,14 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) | |||
| 2143 | total_bytes = btrfs_super_num_devices(root->fs_info->super_copy); | 2149 | total_bytes = btrfs_super_num_devices(root->fs_info->super_copy); |
| 2144 | btrfs_set_super_num_devices(root->fs_info->super_copy, | 2150 | btrfs_set_super_num_devices(root->fs_info->super_copy, |
| 2145 | total_bytes + 1); | 2151 | total_bytes + 1); |
| 2152 | |||
| 2153 | /* add sysfs device entry */ | ||
| 2154 | btrfs_kobj_add_device(root->fs_info, device); | ||
| 2155 | |||
| 2146 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); | 2156 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); |
| 2147 | 2157 | ||
| 2148 | if (seeding_dev) { | 2158 | if (seeding_dev) { |
| 2159 | char fsid_buf[BTRFS_UUID_UNPARSED_SIZE]; | ||
| 2149 | ret = init_first_rw_device(trans, root, device); | 2160 | ret = init_first_rw_device(trans, root, device); |
| 2150 | if (ret) { | 2161 | if (ret) { |
| 2151 | btrfs_abort_transaction(trans, root, ret); | 2162 | btrfs_abort_transaction(trans, root, ret); |
| @@ -2156,6 +2167,14 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) | |||
| 2156 | btrfs_abort_transaction(trans, root, ret); | 2167 | btrfs_abort_transaction(trans, root, ret); |
| 2157 | goto error_trans; | 2168 | goto error_trans; |
| 2158 | } | 2169 | } |
| 2170 | |||
| 2171 | /* Sprouting would change fsid of the mounted root, | ||
| 2172 | * so rename the fsid on the sysfs | ||
| 2173 | */ | ||
| 2174 | snprintf(fsid_buf, BTRFS_UUID_UNPARSED_SIZE, "%pU", | ||
| 2175 | root->fs_info->fsid); | ||
| 2176 | if (kobject_rename(&root->fs_info->super_kobj, fsid_buf)) | ||
| 2177 | goto error_trans; | ||
| 2159 | } else { | 2178 | } else { |
| 2160 | ret = btrfs_add_device(trans, root, device); | 2179 | ret = btrfs_add_device(trans, root, device); |
| 2161 | if (ret) { | 2180 | if (ret) { |
| @@ -2205,6 +2224,7 @@ error_trans: | |||
| 2205 | unlock_chunks(root); | 2224 | unlock_chunks(root); |
| 2206 | btrfs_end_transaction(trans, root); | 2225 | btrfs_end_transaction(trans, root); |
| 2207 | rcu_string_free(device->name); | 2226 | rcu_string_free(device->name); |
| 2227 | btrfs_kobj_rm_device(root->fs_info, device); | ||
| 2208 | kfree(device); | 2228 | kfree(device); |
| 2209 | error: | 2229 | error: |
| 2210 | blkdev_put(bdev, FMODE_EXCL); | 2230 | blkdev_put(bdev, FMODE_EXCL); |
| @@ -2543,9 +2563,6 @@ static int btrfs_relocate_chunk(struct btrfs_root *root, | |||
| 2543 | remove_extent_mapping(em_tree, em); | 2563 | remove_extent_mapping(em_tree, em); |
| 2544 | write_unlock(&em_tree->lock); | 2564 | write_unlock(&em_tree->lock); |
| 2545 | 2565 | ||
| 2546 | kfree(map); | ||
| 2547 | em->bdev = NULL; | ||
| 2548 | |||
| 2549 | /* once for the tree */ | 2566 | /* once for the tree */ |
| 2550 | free_extent_map(em); | 2567 | free_extent_map(em); |
| 2551 | /* once for us */ | 2568 | /* once for us */ |
| @@ -4301,9 +4318,11 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, | |||
| 4301 | 4318 | ||
| 4302 | em = alloc_extent_map(); | 4319 | em = alloc_extent_map(); |
| 4303 | if (!em) { | 4320 | if (!em) { |
| 4321 | kfree(map); | ||
| 4304 | ret = -ENOMEM; | 4322 | ret = -ENOMEM; |
| 4305 | goto error; | 4323 | goto error; |
| 4306 | } | 4324 | } |
| 4325 | set_bit(EXTENT_FLAG_FS_MAPPING, &em->flags); | ||
| 4307 | em->bdev = (struct block_device *)map; | 4326 | em->bdev = (struct block_device *)map; |
| 4308 | em->start = start; | 4327 | em->start = start; |
| 4309 | em->len = num_bytes; | 4328 | em->len = num_bytes; |
| @@ -4346,7 +4365,6 @@ error_del_extent: | |||
| 4346 | /* One for the tree reference */ | 4365 | /* One for the tree reference */ |
| 4347 | free_extent_map(em); | 4366 | free_extent_map(em); |
| 4348 | error: | 4367 | error: |
| 4349 | kfree(map); | ||
| 4350 | kfree(devices_info); | 4368 | kfree(devices_info); |
| 4351 | return ret; | 4369 | return ret; |
| 4352 | } | 4370 | } |
| @@ -4558,7 +4576,6 @@ void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree) | |||
| 4558 | write_unlock(&tree->map_tree.lock); | 4576 | write_unlock(&tree->map_tree.lock); |
| 4559 | if (!em) | 4577 | if (!em) |
| 4560 | break; | 4578 | break; |
| 4561 | kfree(em->bdev); | ||
| 4562 | /* once for us */ | 4579 | /* once for us */ |
| 4563 | free_extent_map(em); | 4580 | free_extent_map(em); |
| 4564 | /* once for the tree */ | 4581 | /* once for the tree */ |
| @@ -5362,6 +5379,15 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree, | |||
| 5362 | return 0; | 5379 | return 0; |
| 5363 | } | 5380 | } |
| 5364 | 5381 | ||
| 5382 | static inline void btrfs_end_bbio(struct btrfs_bio *bbio, struct bio *bio, int err) | ||
| 5383 | { | ||
| 5384 | if (likely(bbio->flags & BTRFS_BIO_ORIG_BIO_SUBMITTED)) | ||
| 5385 | bio_endio_nodec(bio, err); | ||
| 5386 | else | ||
| 5387 | bio_endio(bio, err); | ||
| 5388 | kfree(bbio); | ||
| 5389 | } | ||
| 5390 | |||
| 5365 | static void btrfs_end_bio(struct bio *bio, int err) | 5391 | static void btrfs_end_bio(struct bio *bio, int err) |
| 5366 | { | 5392 | { |
| 5367 | struct btrfs_bio *bbio = bio->bi_private; | 5393 | struct btrfs_bio *bbio = bio->bi_private; |
| @@ -5402,12 +5428,6 @@ static void btrfs_end_bio(struct bio *bio, int err) | |||
| 5402 | bio = bbio->orig_bio; | 5428 | bio = bbio->orig_bio; |
| 5403 | } | 5429 | } |
| 5404 | 5430 | ||
| 5405 | /* | ||
| 5406 | * We have original bio now. So increment bi_remaining to | ||
| 5407 | * account for it in endio | ||
| 5408 | */ | ||
| 5409 | atomic_inc(&bio->bi_remaining); | ||
| 5410 | |||
| 5411 | bio->bi_private = bbio->private; | 5431 | bio->bi_private = bbio->private; |
| 5412 | bio->bi_end_io = bbio->end_io; | 5432 | bio->bi_end_io = bbio->end_io; |
| 5413 | btrfs_io_bio(bio)->mirror_num = bbio->mirror_num; | 5433 | btrfs_io_bio(bio)->mirror_num = bbio->mirror_num; |
| @@ -5424,9 +5444,8 @@ static void btrfs_end_bio(struct bio *bio, int err) | |||
| 5424 | set_bit(BIO_UPTODATE, &bio->bi_flags); | 5444 | set_bit(BIO_UPTODATE, &bio->bi_flags); |
| 5425 | err = 0; | 5445 | err = 0; |
| 5426 | } | 5446 | } |
| 5427 | kfree(bbio); | ||
| 5428 | 5447 | ||
| 5429 | bio_endio(bio, err); | 5448 | btrfs_end_bbio(bbio, bio, err); |
| 5430 | } else if (!is_orig_bio) { | 5449 | } else if (!is_orig_bio) { |
| 5431 | bio_put(bio); | 5450 | bio_put(bio); |
| 5432 | } | 5451 | } |
| @@ -5589,12 +5608,15 @@ static void bbio_error(struct btrfs_bio *bbio, struct bio *bio, u64 logical) | |||
| 5589 | { | 5608 | { |
| 5590 | atomic_inc(&bbio->error); | 5609 | atomic_inc(&bbio->error); |
| 5591 | if (atomic_dec_and_test(&bbio->stripes_pending)) { | 5610 | if (atomic_dec_and_test(&bbio->stripes_pending)) { |
| 5611 | /* Shoud be the original bio. */ | ||
| 5612 | WARN_ON(bio != bbio->orig_bio); | ||
| 5613 | |||
| 5592 | bio->bi_private = bbio->private; | 5614 | bio->bi_private = bbio->private; |
| 5593 | bio->bi_end_io = bbio->end_io; | 5615 | bio->bi_end_io = bbio->end_io; |
| 5594 | btrfs_io_bio(bio)->mirror_num = bbio->mirror_num; | 5616 | btrfs_io_bio(bio)->mirror_num = bbio->mirror_num; |
| 5595 | bio->bi_iter.bi_sector = logical >> 9; | 5617 | bio->bi_iter.bi_sector = logical >> 9; |
| 5596 | kfree(bbio); | 5618 | |
| 5597 | bio_endio(bio, -EIO); | 5619 | btrfs_end_bbio(bbio, bio, -EIO); |
| 5598 | } | 5620 | } |
| 5599 | } | 5621 | } |
| 5600 | 5622 | ||
| @@ -5681,6 +5703,7 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, | |||
| 5681 | BUG_ON(!bio); /* -ENOMEM */ | 5703 | BUG_ON(!bio); /* -ENOMEM */ |
| 5682 | } else { | 5704 | } else { |
| 5683 | bio = first_bio; | 5705 | bio = first_bio; |
| 5706 | bbio->flags |= BTRFS_BIO_ORIG_BIO_SUBMITTED; | ||
| 5684 | } | 5707 | } |
| 5685 | 5708 | ||
| 5686 | submit_stripe_bio(root, bbio, bio, | 5709 | submit_stripe_bio(root, bbio, bio, |
| @@ -5822,6 +5845,7 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, | |||
| 5822 | return -ENOMEM; | 5845 | return -ENOMEM; |
| 5823 | } | 5846 | } |
| 5824 | 5847 | ||
| 5848 | set_bit(EXTENT_FLAG_FS_MAPPING, &em->flags); | ||
| 5825 | em->bdev = (struct block_device *)map; | 5849 | em->bdev = (struct block_device *)map; |
| 5826 | em->start = logical; | 5850 | em->start = logical; |
| 5827 | em->len = length; | 5851 | em->len = length; |
| @@ -5846,7 +5870,6 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, | |||
| 5846 | map->stripes[i].dev = btrfs_find_device(root->fs_info, devid, | 5870 | map->stripes[i].dev = btrfs_find_device(root->fs_info, devid, |
| 5847 | uuid, NULL); | 5871 | uuid, NULL); |
| 5848 | if (!map->stripes[i].dev && !btrfs_test_opt(root, DEGRADED)) { | 5872 | if (!map->stripes[i].dev && !btrfs_test_opt(root, DEGRADED)) { |
| 5849 | kfree(map); | ||
| 5850 | free_extent_map(em); | 5873 | free_extent_map(em); |
| 5851 | return -EIO; | 5874 | return -EIO; |
| 5852 | } | 5875 | } |
| @@ -5854,7 +5877,6 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, | |||
| 5854 | map->stripes[i].dev = | 5877 | map->stripes[i].dev = |
| 5855 | add_missing_dev(root, devid, uuid); | 5878 | add_missing_dev(root, devid, uuid); |
| 5856 | if (!map->stripes[i].dev) { | 5879 | if (!map->stripes[i].dev) { |
| 5857 | kfree(map); | ||
| 5858 | free_extent_map(em); | 5880 | free_extent_map(em); |
| 5859 | return -EIO; | 5881 | return -EIO; |
| 5860 | } | 5882 | } |
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 1a15bbeb65e2..2aaa00c47816 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h | |||
| @@ -190,11 +190,14 @@ struct btrfs_bio_stripe { | |||
| 190 | struct btrfs_bio; | 190 | struct btrfs_bio; |
| 191 | typedef void (btrfs_bio_end_io_t) (struct btrfs_bio *bio, int err); | 191 | typedef void (btrfs_bio_end_io_t) (struct btrfs_bio *bio, int err); |
| 192 | 192 | ||
| 193 | #define BTRFS_BIO_ORIG_BIO_SUBMITTED 0x1 | ||
| 194 | |||
| 193 | struct btrfs_bio { | 195 | struct btrfs_bio { |
| 194 | atomic_t stripes_pending; | 196 | atomic_t stripes_pending; |
| 195 | struct btrfs_fs_info *fs_info; | 197 | struct btrfs_fs_info *fs_info; |
| 196 | bio_end_io_t *end_io; | 198 | bio_end_io_t *end_io; |
| 197 | struct bio *orig_bio; | 199 | struct bio *orig_bio; |
| 200 | unsigned long flags; | ||
| 198 | void *private; | 201 | void *private; |
| 199 | atomic_t error; | 202 | atomic_t error; |
| 200 | int max_errors; | 203 | int max_errors; |
diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c index 4f196314c0c1..b67d8fc81277 100644 --- a/fs/btrfs/zlib.c +++ b/fs/btrfs/zlib.c | |||
| @@ -136,7 +136,7 @@ static int zlib_compress_pages(struct list_head *ws, | |||
| 136 | if (workspace->def_strm.total_in > 8192 && | 136 | if (workspace->def_strm.total_in > 8192 && |
| 137 | workspace->def_strm.total_in < | 137 | workspace->def_strm.total_in < |
| 138 | workspace->def_strm.total_out) { | 138 | workspace->def_strm.total_out) { |
| 139 | ret = -EIO; | 139 | ret = -E2BIG; |
| 140 | goto out; | 140 | goto out; |
| 141 | } | 141 | } |
| 142 | /* we need another page for writing out. Test this | 142 | /* we need another page for writing out. Test this |
diff --git a/fs/ceph/acl.c b/fs/ceph/acl.c index 21887d63dad5..469f2e8657e8 100644 --- a/fs/ceph/acl.c +++ b/fs/ceph/acl.c | |||
| @@ -104,12 +104,6 @@ int ceph_set_acl(struct inode *inode, struct posix_acl *acl, int type) | |||
| 104 | umode_t new_mode = inode->i_mode, old_mode = inode->i_mode; | 104 | umode_t new_mode = inode->i_mode, old_mode = inode->i_mode; |
| 105 | struct dentry *dentry; | 105 | struct dentry *dentry; |
| 106 | 106 | ||
| 107 | if (acl) { | ||
| 108 | ret = posix_acl_valid(acl); | ||
| 109 | if (ret < 0) | ||
| 110 | goto out; | ||
| 111 | } | ||
| 112 | |||
| 113 | switch (type) { | 107 | switch (type) { |
| 114 | case ACL_TYPE_ACCESS: | 108 | case ACL_TYPE_ACCESS: |
| 115 | name = POSIX_ACL_XATTR_ACCESS; | 109 | name = POSIX_ACL_XATTR_ACCESS; |
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 65a30e817dd8..90b3954d48ed 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c | |||
| @@ -211,18 +211,15 @@ static int readpage_nounlock(struct file *filp, struct page *page) | |||
| 211 | SetPageError(page); | 211 | SetPageError(page); |
| 212 | ceph_fscache_readpage_cancel(inode, page); | 212 | ceph_fscache_readpage_cancel(inode, page); |
| 213 | goto out; | 213 | goto out; |
| 214 | } else { | ||
| 215 | if (err < PAGE_CACHE_SIZE) { | ||
| 216 | /* zero fill remainder of page */ | ||
| 217 | zero_user_segment(page, err, PAGE_CACHE_SIZE); | ||
| 218 | } else { | ||
| 219 | flush_dcache_page(page); | ||
| 220 | } | ||
| 221 | } | 214 | } |
| 222 | SetPageUptodate(page); | 215 | if (err < PAGE_CACHE_SIZE) |
| 216 | /* zero fill remainder of page */ | ||
| 217 | zero_user_segment(page, err, PAGE_CACHE_SIZE); | ||
| 218 | else | ||
| 219 | flush_dcache_page(page); | ||
| 223 | 220 | ||
| 224 | if (err >= 0) | 221 | SetPageUptodate(page); |
| 225 | ceph_readpage_to_fscache(inode, page); | 222 | ceph_readpage_to_fscache(inode, page); |
| 226 | 223 | ||
| 227 | out: | 224 | out: |
| 228 | return err < 0 ? err : 0; | 225 | return err < 0 ? err : 0; |
| @@ -1187,8 +1184,8 @@ static int ceph_write_end(struct file *file, struct address_space *mapping, | |||
| 1187 | * never get called. | 1184 | * never get called. |
| 1188 | */ | 1185 | */ |
| 1189 | static ssize_t ceph_direct_io(int rw, struct kiocb *iocb, | 1186 | static ssize_t ceph_direct_io(int rw, struct kiocb *iocb, |
| 1190 | const struct iovec *iov, | 1187 | struct iov_iter *iter, |
| 1191 | loff_t pos, unsigned long nr_segs) | 1188 | loff_t pos) |
| 1192 | { | 1189 | { |
| 1193 | WARN_ON(1); | 1190 | WARN_ON(1); |
| 1194 | return -EINVAL; | 1191 | return -EINVAL; |
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index c561b628ebce..1fde164b74b5 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c | |||
| @@ -221,8 +221,8 @@ int ceph_unreserve_caps(struct ceph_mds_client *mdsc, | |||
| 221 | return 0; | 221 | return 0; |
| 222 | } | 222 | } |
| 223 | 223 | ||
| 224 | static struct ceph_cap *get_cap(struct ceph_mds_client *mdsc, | 224 | struct ceph_cap *ceph_get_cap(struct ceph_mds_client *mdsc, |
| 225 | struct ceph_cap_reservation *ctx) | 225 | struct ceph_cap_reservation *ctx) |
| 226 | { | 226 | { |
| 227 | struct ceph_cap *cap = NULL; | 227 | struct ceph_cap *cap = NULL; |
| 228 | 228 | ||
| @@ -508,15 +508,14 @@ static void __check_cap_issue(struct ceph_inode_info *ci, struct ceph_cap *cap, | |||
| 508 | * it is < 0. (This is so we can atomically add the cap and add an | 508 | * it is < 0. (This is so we can atomically add the cap and add an |
| 509 | * open file reference to it.) | 509 | * open file reference to it.) |
| 510 | */ | 510 | */ |
| 511 | int ceph_add_cap(struct inode *inode, | 511 | void ceph_add_cap(struct inode *inode, |
| 512 | struct ceph_mds_session *session, u64 cap_id, | 512 | struct ceph_mds_session *session, u64 cap_id, |
| 513 | int fmode, unsigned issued, unsigned wanted, | 513 | int fmode, unsigned issued, unsigned wanted, |
| 514 | unsigned seq, unsigned mseq, u64 realmino, int flags, | 514 | unsigned seq, unsigned mseq, u64 realmino, int flags, |
| 515 | struct ceph_cap_reservation *caps_reservation) | 515 | struct ceph_cap **new_cap) |
| 516 | { | 516 | { |
| 517 | struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; | 517 | struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; |
| 518 | struct ceph_inode_info *ci = ceph_inode(inode); | 518 | struct ceph_inode_info *ci = ceph_inode(inode); |
| 519 | struct ceph_cap *new_cap = NULL; | ||
| 520 | struct ceph_cap *cap; | 519 | struct ceph_cap *cap; |
| 521 | int mds = session->s_mds; | 520 | int mds = session->s_mds; |
| 522 | int actual_wanted; | 521 | int actual_wanted; |
| @@ -531,20 +530,10 @@ int ceph_add_cap(struct inode *inode, | |||
| 531 | if (fmode >= 0) | 530 | if (fmode >= 0) |
| 532 | wanted |= ceph_caps_for_mode(fmode); | 531 | wanted |= ceph_caps_for_mode(fmode); |
| 533 | 532 | ||
| 534 | retry: | ||
| 535 | spin_lock(&ci->i_ceph_lock); | ||
| 536 | cap = __get_cap_for_mds(ci, mds); | 533 | cap = __get_cap_for_mds(ci, mds); |
| 537 | if (!cap) { | 534 | if (!cap) { |
| 538 | if (new_cap) { | 535 | cap = *new_cap; |
| 539 | cap = new_cap; | 536 | *new_cap = NULL; |
| 540 | new_cap = NULL; | ||
| 541 | } else { | ||
| 542 | spin_unlock(&ci->i_ceph_lock); | ||
| 543 | new_cap = get_cap(mdsc, caps_reservation); | ||
| 544 | if (new_cap == NULL) | ||
| 545 | return -ENOMEM; | ||
| 546 | goto retry; | ||
| 547 | } | ||
| 548 | 537 | ||
| 549 | cap->issued = 0; | 538 | cap->issued = 0; |
| 550 | cap->implemented = 0; | 539 | cap->implemented = 0; |
| @@ -562,9 +551,6 @@ retry: | |||
| 562 | session->s_nr_caps++; | 551 | session->s_nr_caps++; |
| 563 | spin_unlock(&session->s_cap_lock); | 552 | spin_unlock(&session->s_cap_lock); |
| 564 | } else { | 553 | } else { |
| 565 | if (new_cap) | ||
| 566 | ceph_put_cap(mdsc, new_cap); | ||
| 567 | |||
| 568 | /* | 554 | /* |
| 569 | * auth mds of the inode changed. we received the cap export | 555 | * auth mds of the inode changed. we received the cap export |
| 570 | * message, but still haven't received the cap import message. | 556 | * message, but still haven't received the cap import message. |
| @@ -626,7 +612,6 @@ retry: | |||
| 626 | ci->i_auth_cap = cap; | 612 | ci->i_auth_cap = cap; |
| 627 | cap->mds_wanted = wanted; | 613 | cap->mds_wanted = wanted; |
| 628 | } | 614 | } |
| 629 | ci->i_cap_exporting_issued = 0; | ||
| 630 | } else { | 615 | } else { |
| 631 | WARN_ON(ci->i_auth_cap == cap); | 616 | WARN_ON(ci->i_auth_cap == cap); |
| 632 | } | 617 | } |
| @@ -648,9 +633,6 @@ retry: | |||
| 648 | 633 | ||
| 649 | if (fmode >= 0) | 634 | if (fmode >= 0) |
| 650 | __ceph_get_fmode(ci, fmode); | 635 | __ceph_get_fmode(ci, fmode); |
| 651 | spin_unlock(&ci->i_ceph_lock); | ||
| 652 | wake_up_all(&ci->i_cap_wq); | ||
| 653 | return 0; | ||
| 654 | } | 636 | } |
| 655 | 637 | ||
| 656 | /* | 638 | /* |
| @@ -685,7 +667,7 @@ static int __cap_is_valid(struct ceph_cap *cap) | |||
| 685 | */ | 667 | */ |
| 686 | int __ceph_caps_issued(struct ceph_inode_info *ci, int *implemented) | 668 | int __ceph_caps_issued(struct ceph_inode_info *ci, int *implemented) |
| 687 | { | 669 | { |
| 688 | int have = ci->i_snap_caps | ci->i_cap_exporting_issued; | 670 | int have = ci->i_snap_caps; |
| 689 | struct ceph_cap *cap; | 671 | struct ceph_cap *cap; |
| 690 | struct rb_node *p; | 672 | struct rb_node *p; |
| 691 | 673 | ||
| @@ -900,7 +882,7 @@ int __ceph_caps_mds_wanted(struct ceph_inode_info *ci) | |||
| 900 | */ | 882 | */ |
| 901 | static int __ceph_is_any_caps(struct ceph_inode_info *ci) | 883 | static int __ceph_is_any_caps(struct ceph_inode_info *ci) |
| 902 | { | 884 | { |
| 903 | return !RB_EMPTY_ROOT(&ci->i_caps) || ci->i_cap_exporting_issued; | 885 | return !RB_EMPTY_ROOT(&ci->i_caps); |
| 904 | } | 886 | } |
| 905 | 887 | ||
| 906 | int ceph_is_any_caps(struct inode *inode) | 888 | int ceph_is_any_caps(struct inode *inode) |
| @@ -2397,32 +2379,30 @@ static void invalidate_aliases(struct inode *inode) | |||
| 2397 | * actually be a revocation if it specifies a smaller cap set.) | 2379 | * actually be a revocation if it specifies a smaller cap set.) |
| 2398 | * | 2380 | * |
| 2399 | * caller holds s_mutex and i_ceph_lock, we drop both. | 2381 | * caller holds s_mutex and i_ceph_lock, we drop both. |
| 2400 | * | ||
| 2401 | * return value: | ||
| 2402 | * 0 - ok | ||
| 2403 | * 1 - check_caps on auth cap only (writeback) | ||
| 2404 | * 2 - check_caps (ack revoke) | ||
| 2405 | */ | 2382 | */ |
| 2406 | static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, | 2383 | static void handle_cap_grant(struct ceph_mds_client *mdsc, |
| 2384 | struct inode *inode, struct ceph_mds_caps *grant, | ||
| 2385 | void *snaptrace, int snaptrace_len, | ||
| 2386 | struct ceph_buffer *xattr_buf, | ||
| 2407 | struct ceph_mds_session *session, | 2387 | struct ceph_mds_session *session, |
| 2408 | struct ceph_cap *cap, | 2388 | struct ceph_cap *cap, int issued) |
| 2409 | struct ceph_buffer *xattr_buf) | 2389 | __releases(ci->i_ceph_lock) |
| 2410 | __releases(ci->i_ceph_lock) | ||
| 2411 | { | 2390 | { |
| 2412 | struct ceph_inode_info *ci = ceph_inode(inode); | 2391 | struct ceph_inode_info *ci = ceph_inode(inode); |
| 2413 | int mds = session->s_mds; | 2392 | int mds = session->s_mds; |
| 2414 | int seq = le32_to_cpu(grant->seq); | 2393 | int seq = le32_to_cpu(grant->seq); |
| 2415 | int newcaps = le32_to_cpu(grant->caps); | 2394 | int newcaps = le32_to_cpu(grant->caps); |
| 2416 | int issued, implemented, used, wanted, dirty; | 2395 | int used, wanted, dirty; |
| 2417 | u64 size = le64_to_cpu(grant->size); | 2396 | u64 size = le64_to_cpu(grant->size); |
| 2418 | u64 max_size = le64_to_cpu(grant->max_size); | 2397 | u64 max_size = le64_to_cpu(grant->max_size); |
| 2419 | struct timespec mtime, atime, ctime; | 2398 | struct timespec mtime, atime, ctime; |
| 2420 | int check_caps = 0; | 2399 | int check_caps = 0; |
| 2421 | int wake = 0; | 2400 | bool wake = 0; |
| 2422 | int writeback = 0; | 2401 | bool writeback = 0; |
| 2423 | int queue_invalidate = 0; | 2402 | bool queue_trunc = 0; |
| 2424 | int deleted_inode = 0; | 2403 | bool queue_invalidate = 0; |
| 2425 | int queue_revalidate = 0; | 2404 | bool queue_revalidate = 0; |
| 2405 | bool deleted_inode = 0; | ||
| 2426 | 2406 | ||
| 2427 | dout("handle_cap_grant inode %p cap %p mds%d seq %d %s\n", | 2407 | dout("handle_cap_grant inode %p cap %p mds%d seq %d %s\n", |
| 2428 | inode, cap, mds, seq, ceph_cap_string(newcaps)); | 2408 | inode, cap, mds, seq, ceph_cap_string(newcaps)); |
| @@ -2466,16 +2446,13 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, | |||
| 2466 | } | 2446 | } |
| 2467 | 2447 | ||
| 2468 | /* side effects now are allowed */ | 2448 | /* side effects now are allowed */ |
| 2469 | |||
| 2470 | issued = __ceph_caps_issued(ci, &implemented); | ||
| 2471 | issued |= implemented | __ceph_caps_dirty(ci); | ||
| 2472 | |||
| 2473 | cap->cap_gen = session->s_cap_gen; | 2449 | cap->cap_gen = session->s_cap_gen; |
| 2474 | cap->seq = seq; | 2450 | cap->seq = seq; |
| 2475 | 2451 | ||
| 2476 | __check_cap_issue(ci, cap, newcaps); | 2452 | __check_cap_issue(ci, cap, newcaps); |
| 2477 | 2453 | ||
| 2478 | if ((issued & CEPH_CAP_AUTH_EXCL) == 0) { | 2454 | if ((newcaps & CEPH_CAP_AUTH_SHARED) && |
| 2455 | (issued & CEPH_CAP_AUTH_EXCL) == 0) { | ||
| 2479 | inode->i_mode = le32_to_cpu(grant->mode); | 2456 | inode->i_mode = le32_to_cpu(grant->mode); |
| 2480 | inode->i_uid = make_kuid(&init_user_ns, le32_to_cpu(grant->uid)); | 2457 | inode->i_uid = make_kuid(&init_user_ns, le32_to_cpu(grant->uid)); |
| 2481 | inode->i_gid = make_kgid(&init_user_ns, le32_to_cpu(grant->gid)); | 2458 | inode->i_gid = make_kgid(&init_user_ns, le32_to_cpu(grant->gid)); |
| @@ -2484,7 +2461,8 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, | |||
| 2484 | from_kgid(&init_user_ns, inode->i_gid)); | 2461 | from_kgid(&init_user_ns, inode->i_gid)); |
| 2485 | } | 2462 | } |
| 2486 | 2463 | ||
| 2487 | if ((issued & CEPH_CAP_LINK_EXCL) == 0) { | 2464 | if ((newcaps & CEPH_CAP_AUTH_SHARED) && |
| 2465 | (issued & CEPH_CAP_LINK_EXCL) == 0) { | ||
| 2488 | set_nlink(inode, le32_to_cpu(grant->nlink)); | 2466 | set_nlink(inode, le32_to_cpu(grant->nlink)); |
| 2489 | if (inode->i_nlink == 0 && | 2467 | if (inode->i_nlink == 0 && |
| 2490 | (newcaps & (CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL))) | 2468 | (newcaps & (CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL))) |
| @@ -2511,30 +2489,35 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, | |||
| 2511 | if ((issued & CEPH_CAP_FILE_CACHE) && ci->i_rdcache_gen > 1) | 2489 | if ((issued & CEPH_CAP_FILE_CACHE) && ci->i_rdcache_gen > 1) |
| 2512 | queue_revalidate = 1; | 2490 | queue_revalidate = 1; |
| 2513 | 2491 | ||
| 2514 | /* size/ctime/mtime/atime? */ | 2492 | if (newcaps & CEPH_CAP_ANY_RD) { |
| 2515 | ceph_fill_file_size(inode, issued, | 2493 | /* ctime/mtime/atime? */ |
| 2516 | le32_to_cpu(grant->truncate_seq), | 2494 | ceph_decode_timespec(&mtime, &grant->mtime); |
| 2517 | le64_to_cpu(grant->truncate_size), size); | 2495 | ceph_decode_timespec(&atime, &grant->atime); |
| 2518 | ceph_decode_timespec(&mtime, &grant->mtime); | 2496 | ceph_decode_timespec(&ctime, &grant->ctime); |
| 2519 | ceph_decode_timespec(&atime, &grant->atime); | 2497 | ceph_fill_file_time(inode, issued, |
| 2520 | ceph_decode_timespec(&ctime, &grant->ctime); | 2498 | le32_to_cpu(grant->time_warp_seq), |
| 2521 | ceph_fill_file_time(inode, issued, | 2499 | &ctime, &mtime, &atime); |
| 2522 | le32_to_cpu(grant->time_warp_seq), &ctime, &mtime, | 2500 | } |
| 2523 | &atime); | 2501 | |
| 2524 | 2502 | if (newcaps & (CEPH_CAP_ANY_FILE_RD | CEPH_CAP_ANY_FILE_WR)) { | |
| 2525 | 2503 | /* file layout may have changed */ | |
| 2526 | /* file layout may have changed */ | 2504 | ci->i_layout = grant->layout; |
| 2527 | ci->i_layout = grant->layout; | 2505 | /* size/truncate_seq? */ |
| 2528 | 2506 | queue_trunc = ceph_fill_file_size(inode, issued, | |
| 2529 | /* max size increase? */ | 2507 | le32_to_cpu(grant->truncate_seq), |
| 2530 | if (ci->i_auth_cap == cap && max_size != ci->i_max_size) { | 2508 | le64_to_cpu(grant->truncate_size), |
| 2531 | dout("max_size %lld -> %llu\n", ci->i_max_size, max_size); | 2509 | size); |
| 2532 | ci->i_max_size = max_size; | 2510 | /* max size increase? */ |
| 2533 | if (max_size >= ci->i_wanted_max_size) { | 2511 | if (ci->i_auth_cap == cap && max_size != ci->i_max_size) { |
| 2534 | ci->i_wanted_max_size = 0; /* reset */ | 2512 | dout("max_size %lld -> %llu\n", |
| 2535 | ci->i_requested_max_size = 0; | 2513 | ci->i_max_size, max_size); |
| 2514 | ci->i_max_size = max_size; | ||
| 2515 | if (max_size >= ci->i_wanted_max_size) { | ||
| 2516 | ci->i_wanted_max_size = 0; /* reset */ | ||
| 2517 | ci->i_requested_max_size = 0; | ||
| 2518 | } | ||
| 2519 | wake = 1; | ||
| 2536 | } | 2520 | } |
| 2537 | wake = 1; | ||
| 2538 | } | 2521 | } |
| 2539 | 2522 | ||
| 2540 | /* check cap bits */ | 2523 | /* check cap bits */ |
| @@ -2595,6 +2578,23 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, | |||
| 2595 | 2578 | ||
| 2596 | spin_unlock(&ci->i_ceph_lock); | 2579 | spin_unlock(&ci->i_ceph_lock); |
| 2597 | 2580 | ||
| 2581 | if (le32_to_cpu(grant->op) == CEPH_CAP_OP_IMPORT) { | ||
| 2582 | down_write(&mdsc->snap_rwsem); | ||
| 2583 | ceph_update_snap_trace(mdsc, snaptrace, | ||
| 2584 | snaptrace + snaptrace_len, false); | ||
| 2585 | downgrade_write(&mdsc->snap_rwsem); | ||
| 2586 | kick_flushing_inode_caps(mdsc, session, inode); | ||
| 2587 | up_read(&mdsc->snap_rwsem); | ||
| 2588 | if (newcaps & ~issued) | ||
| 2589 | wake = 1; | ||
| 2590 | } | ||
| 2591 | |||
| 2592 | if (queue_trunc) { | ||
| 2593 | ceph_queue_vmtruncate(inode); | ||
| 2594 | ceph_queue_revalidate(inode); | ||
| 2595 | } else if (queue_revalidate) | ||
| 2596 | ceph_queue_revalidate(inode); | ||
| 2597 | |||
| 2598 | if (writeback) | 2598 | if (writeback) |
| 2599 | /* | 2599 | /* |
| 2600 | * queue inode for writeback: we can't actually call | 2600 | * queue inode for writeback: we can't actually call |
| @@ -2606,8 +2606,6 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, | |||
| 2606 | ceph_queue_invalidate(inode); | 2606 | ceph_queue_invalidate(inode); |
| 2607 | if (deleted_inode) | 2607 | if (deleted_inode) |
| 2608 | invalidate_aliases(inode); | 2608 | invalidate_aliases(inode); |
| 2609 | if (queue_revalidate) | ||
| 2610 | ceph_queue_revalidate(inode); | ||
| 2611 | if (wake) | 2609 | if (wake) |
| 2612 | wake_up_all(&ci->i_cap_wq); | 2610 | wake_up_all(&ci->i_cap_wq); |
| 2613 | 2611 | ||
| @@ -2784,7 +2782,7 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex, | |||
| 2784 | { | 2782 | { |
| 2785 | struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; | 2783 | struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; |
| 2786 | struct ceph_mds_session *tsession = NULL; | 2784 | struct ceph_mds_session *tsession = NULL; |
| 2787 | struct ceph_cap *cap, *tcap; | 2785 | struct ceph_cap *cap, *tcap, *new_cap = NULL; |
| 2788 | struct ceph_inode_info *ci = ceph_inode(inode); | 2786 | struct ceph_inode_info *ci = ceph_inode(inode); |
| 2789 | u64 t_cap_id; | 2787 | u64 t_cap_id; |
| 2790 | unsigned mseq = le32_to_cpu(ex->migrate_seq); | 2788 | unsigned mseq = le32_to_cpu(ex->migrate_seq); |
| @@ -2807,7 +2805,7 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex, | |||
| 2807 | retry: | 2805 | retry: |
| 2808 | spin_lock(&ci->i_ceph_lock); | 2806 | spin_lock(&ci->i_ceph_lock); |
| 2809 | cap = __get_cap_for_mds(ci, mds); | 2807 | cap = __get_cap_for_mds(ci, mds); |
| 2810 | if (!cap) | 2808 | if (!cap || cap->cap_id != le64_to_cpu(ex->cap_id)) |
| 2811 | goto out_unlock; | 2809 | goto out_unlock; |
| 2812 | 2810 | ||
| 2813 | if (target < 0) { | 2811 | if (target < 0) { |
| @@ -2846,15 +2844,14 @@ retry: | |||
| 2846 | } | 2844 | } |
| 2847 | __ceph_remove_cap(cap, false); | 2845 | __ceph_remove_cap(cap, false); |
| 2848 | goto out_unlock; | 2846 | goto out_unlock; |
| 2849 | } | 2847 | } else if (tsession) { |
| 2850 | |||
| 2851 | if (tsession) { | ||
| 2852 | int flag = (cap == ci->i_auth_cap) ? CEPH_CAP_FLAG_AUTH : 0; | ||
| 2853 | spin_unlock(&ci->i_ceph_lock); | ||
| 2854 | /* add placeholder for the export tagert */ | 2848 | /* add placeholder for the export tagert */ |
| 2849 | int flag = (cap == ci->i_auth_cap) ? CEPH_CAP_FLAG_AUTH : 0; | ||
| 2855 | ceph_add_cap(inode, tsession, t_cap_id, -1, issued, 0, | 2850 | ceph_add_cap(inode, tsession, t_cap_id, -1, issued, 0, |
| 2856 | t_seq - 1, t_mseq, (u64)-1, flag, NULL); | 2851 | t_seq - 1, t_mseq, (u64)-1, flag, &new_cap); |
| 2857 | goto retry; | 2852 | |
| 2853 | __ceph_remove_cap(cap, false); | ||
| 2854 | goto out_unlock; | ||
| 2858 | } | 2855 | } |
| 2859 | 2856 | ||
| 2860 | spin_unlock(&ci->i_ceph_lock); | 2857 | spin_unlock(&ci->i_ceph_lock); |
| @@ -2873,6 +2870,7 @@ retry: | |||
| 2873 | SINGLE_DEPTH_NESTING); | 2870 | SINGLE_DEPTH_NESTING); |
| 2874 | } | 2871 | } |
| 2875 | ceph_add_cap_releases(mdsc, tsession); | 2872 | ceph_add_cap_releases(mdsc, tsession); |
| 2873 | new_cap = ceph_get_cap(mdsc, NULL); | ||
| 2876 | } else { | 2874 | } else { |
| 2877 | WARN_ON(1); | 2875 | WARN_ON(1); |
| 2878 | tsession = NULL; | 2876 | tsession = NULL; |
| @@ -2887,24 +2885,27 @@ out_unlock: | |||
| 2887 | mutex_unlock(&tsession->s_mutex); | 2885 | mutex_unlock(&tsession->s_mutex); |
| 2888 | ceph_put_mds_session(tsession); | 2886 | ceph_put_mds_session(tsession); |
| 2889 | } | 2887 | } |
| 2888 | if (new_cap) | ||
| 2889 | ceph_put_cap(mdsc, new_cap); | ||
| 2890 | } | 2890 | } |
| 2891 | 2891 | ||
| 2892 | /* | 2892 | /* |
| 2893 | * Handle cap IMPORT. If there are temp bits from an older EXPORT, | 2893 | * Handle cap IMPORT. |
| 2894 | * clean them up. | ||
| 2895 | * | 2894 | * |
| 2896 | * caller holds s_mutex. | 2895 | * caller holds s_mutex. acquires i_ceph_lock |
| 2897 | */ | 2896 | */ |
| 2898 | static void handle_cap_import(struct ceph_mds_client *mdsc, | 2897 | static void handle_cap_import(struct ceph_mds_client *mdsc, |
| 2899 | struct inode *inode, struct ceph_mds_caps *im, | 2898 | struct inode *inode, struct ceph_mds_caps *im, |
| 2900 | struct ceph_mds_cap_peer *ph, | 2899 | struct ceph_mds_cap_peer *ph, |
| 2901 | struct ceph_mds_session *session, | 2900 | struct ceph_mds_session *session, |
| 2902 | void *snaptrace, int snaptrace_len) | 2901 | struct ceph_cap **target_cap, int *old_issued) |
| 2902 | __acquires(ci->i_ceph_lock) | ||
| 2903 | { | 2903 | { |
| 2904 | struct ceph_inode_info *ci = ceph_inode(inode); | 2904 | struct ceph_inode_info *ci = ceph_inode(inode); |
| 2905 | struct ceph_cap *cap; | 2905 | struct ceph_cap *cap, *ocap, *new_cap = NULL; |
| 2906 | int mds = session->s_mds; | 2906 | int mds = session->s_mds; |
| 2907 | unsigned issued = le32_to_cpu(im->caps); | 2907 | int issued; |
| 2908 | unsigned caps = le32_to_cpu(im->caps); | ||
| 2908 | unsigned wanted = le32_to_cpu(im->wanted); | 2909 | unsigned wanted = le32_to_cpu(im->wanted); |
| 2909 | unsigned seq = le32_to_cpu(im->seq); | 2910 | unsigned seq = le32_to_cpu(im->seq); |
| 2910 | unsigned mseq = le32_to_cpu(im->migrate_seq); | 2911 | unsigned mseq = le32_to_cpu(im->migrate_seq); |
| @@ -2924,40 +2925,52 @@ static void handle_cap_import(struct ceph_mds_client *mdsc, | |||
| 2924 | dout("handle_cap_import inode %p ci %p mds%d mseq %d peer %d\n", | 2925 | dout("handle_cap_import inode %p ci %p mds%d mseq %d peer %d\n", |
| 2925 | inode, ci, mds, mseq, peer); | 2926 | inode, ci, mds, mseq, peer); |
| 2926 | 2927 | ||
| 2928 | retry: | ||
| 2927 | spin_lock(&ci->i_ceph_lock); | 2929 | spin_lock(&ci->i_ceph_lock); |
| 2928 | cap = peer >= 0 ? __get_cap_for_mds(ci, peer) : NULL; | 2930 | cap = __get_cap_for_mds(ci, mds); |
| 2929 | if (cap && cap->cap_id == p_cap_id) { | 2931 | if (!cap) { |
| 2932 | if (!new_cap) { | ||
| 2933 | spin_unlock(&ci->i_ceph_lock); | ||
| 2934 | new_cap = ceph_get_cap(mdsc, NULL); | ||
| 2935 | goto retry; | ||
| 2936 | } | ||
| 2937 | cap = new_cap; | ||
| 2938 | } else { | ||
| 2939 | if (new_cap) { | ||
| 2940 | ceph_put_cap(mdsc, new_cap); | ||
| 2941 | new_cap = NULL; | ||
| 2942 | } | ||
| 2943 | } | ||
| 2944 | |||
| 2945 | __ceph_caps_issued(ci, &issued); | ||
| 2946 | issued |= __ceph_caps_dirty(ci); | ||
| 2947 | |||
| 2948 | ceph_add_cap(inode, session, cap_id, -1, caps, wanted, seq, mseq, | ||
| 2949 | realmino, CEPH_CAP_FLAG_AUTH, &new_cap); | ||
| 2950 | |||
| 2951 | ocap = peer >= 0 ? __get_cap_for_mds(ci, peer) : NULL; | ||
| 2952 | if (ocap && ocap->cap_id == p_cap_id) { | ||
| 2930 | dout(" remove export cap %p mds%d flags %d\n", | 2953 | dout(" remove export cap %p mds%d flags %d\n", |
| 2931 | cap, peer, ph->flags); | 2954 | ocap, peer, ph->flags); |
| 2932 | if ((ph->flags & CEPH_CAP_FLAG_AUTH) && | 2955 | if ((ph->flags & CEPH_CAP_FLAG_AUTH) && |
| 2933 | (cap->seq != le32_to_cpu(ph->seq) || | 2956 | (ocap->seq != le32_to_cpu(ph->seq) || |
| 2934 | cap->mseq != le32_to_cpu(ph->mseq))) { | 2957 | ocap->mseq != le32_to_cpu(ph->mseq))) { |
| 2935 | pr_err("handle_cap_import: mismatched seq/mseq: " | 2958 | pr_err("handle_cap_import: mismatched seq/mseq: " |
| 2936 | "ino (%llx.%llx) mds%d seq %d mseq %d " | 2959 | "ino (%llx.%llx) mds%d seq %d mseq %d " |
| 2937 | "importer mds%d has peer seq %d mseq %d\n", | 2960 | "importer mds%d has peer seq %d mseq %d\n", |
| 2938 | ceph_vinop(inode), peer, cap->seq, | 2961 | ceph_vinop(inode), peer, ocap->seq, |
| 2939 | cap->mseq, mds, le32_to_cpu(ph->seq), | 2962 | ocap->mseq, mds, le32_to_cpu(ph->seq), |
| 2940 | le32_to_cpu(ph->mseq)); | 2963 | le32_to_cpu(ph->mseq)); |
| 2941 | } | 2964 | } |
| 2942 | ci->i_cap_exporting_issued = cap->issued; | 2965 | __ceph_remove_cap(ocap, (ph->flags & CEPH_CAP_FLAG_RELEASE)); |
| 2943 | __ceph_remove_cap(cap, (ph->flags & CEPH_CAP_FLAG_RELEASE)); | ||
| 2944 | } | 2966 | } |
| 2945 | 2967 | ||
| 2946 | /* make sure we re-request max_size, if necessary */ | 2968 | /* make sure we re-request max_size, if necessary */ |
| 2947 | ci->i_wanted_max_size = 0; | 2969 | ci->i_wanted_max_size = 0; |
| 2948 | ci->i_requested_max_size = 0; | 2970 | ci->i_requested_max_size = 0; |
| 2949 | spin_unlock(&ci->i_ceph_lock); | ||
| 2950 | |||
| 2951 | down_write(&mdsc->snap_rwsem); | ||
| 2952 | ceph_update_snap_trace(mdsc, snaptrace, snaptrace+snaptrace_len, | ||
| 2953 | false); | ||
| 2954 | downgrade_write(&mdsc->snap_rwsem); | ||
| 2955 | ceph_add_cap(inode, session, cap_id, -1, | ||
| 2956 | issued, wanted, seq, mseq, realmino, CEPH_CAP_FLAG_AUTH, | ||
| 2957 | NULL /* no caps context */); | ||
| 2958 | kick_flushing_inode_caps(mdsc, session, inode); | ||
| 2959 | up_read(&mdsc->snap_rwsem); | ||
| 2960 | 2971 | ||
| 2972 | *old_issued = issued; | ||
| 2973 | *target_cap = cap; | ||
| 2961 | } | 2974 | } |
| 2962 | 2975 | ||
| 2963 | /* | 2976 | /* |
| @@ -2977,7 +2990,7 @@ void ceph_handle_caps(struct ceph_mds_session *session, | |||
| 2977 | struct ceph_mds_caps *h; | 2990 | struct ceph_mds_caps *h; |
| 2978 | struct ceph_mds_cap_peer *peer = NULL; | 2991 | struct ceph_mds_cap_peer *peer = NULL; |
| 2979 | int mds = session->s_mds; | 2992 | int mds = session->s_mds; |
| 2980 | int op; | 2993 | int op, issued; |
| 2981 | u32 seq, mseq; | 2994 | u32 seq, mseq; |
| 2982 | struct ceph_vino vino; | 2995 | struct ceph_vino vino; |
| 2983 | u64 cap_id; | 2996 | u64 cap_id; |
| @@ -3069,7 +3082,10 @@ void ceph_handle_caps(struct ceph_mds_session *session, | |||
| 3069 | 3082 | ||
| 3070 | case CEPH_CAP_OP_IMPORT: | 3083 | case CEPH_CAP_OP_IMPORT: |
| 3071 | handle_cap_import(mdsc, inode, h, peer, session, | 3084 | handle_cap_import(mdsc, inode, h, peer, session, |
| 3072 | snaptrace, snaptrace_len); | 3085 | &cap, &issued); |
| 3086 | handle_cap_grant(mdsc, inode, h, snaptrace, snaptrace_len, | ||
| 3087 | msg->middle, session, cap, issued); | ||
| 3088 | goto done_unlocked; | ||
| 3073 | } | 3089 | } |
| 3074 | 3090 | ||
| 3075 | /* the rest require a cap */ | 3091 | /* the rest require a cap */ |
| @@ -3086,8 +3102,10 @@ void ceph_handle_caps(struct ceph_mds_session *session, | |||
| 3086 | switch (op) { | 3102 | switch (op) { |
| 3087 | case CEPH_CAP_OP_REVOKE: | 3103 | case CEPH_CAP_OP_REVOKE: |
| 3088 | case CEPH_CAP_OP_GRANT: | 3104 | case CEPH_CAP_OP_GRANT: |
| 3089 | case CEPH_CAP_OP_IMPORT: | 3105 | __ceph_caps_issued(ci, &issued); |
| 3090 | handle_cap_grant(inode, h, session, cap, msg->middle); | 3106 | issued |= __ceph_caps_dirty(ci); |
| 3107 | handle_cap_grant(mdsc, inode, h, NULL, 0, msg->middle, | ||
| 3108 | session, cap, issued); | ||
| 3091 | goto done_unlocked; | 3109 | goto done_unlocked; |
| 3092 | 3110 | ||
| 3093 | case CEPH_CAP_OP_FLUSH_ACK: | 3111 | case CEPH_CAP_OP_FLUSH_ACK: |
diff --git a/fs/ceph/export.c b/fs/ceph/export.c index 00d6af6a32ec..8d7d782f4382 100644 --- a/fs/ceph/export.c +++ b/fs/ceph/export.c | |||
| @@ -169,7 +169,7 @@ static struct dentry *__get_parent(struct super_block *sb, | |||
| 169 | return dentry; | 169 | return dentry; |
| 170 | } | 170 | } |
| 171 | 171 | ||
| 172 | struct dentry *ceph_get_parent(struct dentry *child) | 172 | static struct dentry *ceph_get_parent(struct dentry *child) |
| 173 | { | 173 | { |
| 174 | /* don't re-export snaps */ | 174 | /* don't re-export snaps */ |
| 175 | if (ceph_snap(child->d_inode) != CEPH_NOSNAP) | 175 | if (ceph_snap(child->d_inode) != CEPH_NOSNAP) |
diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 88a6df4cbe6d..302085100c28 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c | |||
| @@ -418,7 +418,7 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *i, | |||
| 418 | struct page **pages; | 418 | struct page **pages; |
| 419 | u64 off = iocb->ki_pos; | 419 | u64 off = iocb->ki_pos; |
| 420 | int num_pages, ret; | 420 | int num_pages, ret; |
| 421 | size_t len = i->count; | 421 | size_t len = iov_iter_count(i); |
| 422 | 422 | ||
| 423 | dout("sync_read on file %p %llu~%u %s\n", file, off, | 423 | dout("sync_read on file %p %llu~%u %s\n", file, off, |
| 424 | (unsigned)len, | 424 | (unsigned)len, |
| @@ -436,25 +436,26 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *i, | |||
| 436 | 436 | ||
| 437 | if (file->f_flags & O_DIRECT) { | 437 | if (file->f_flags & O_DIRECT) { |
| 438 | while (iov_iter_count(i)) { | 438 | while (iov_iter_count(i)) { |
| 439 | void __user *data = i->iov[0].iov_base + i->iov_offset; | 439 | size_t start; |
| 440 | size_t len = i->iov[0].iov_len - i->iov_offset; | 440 | ssize_t n; |
| 441 | 441 | ||
| 442 | num_pages = calc_pages_for((unsigned long)data, len); | 442 | n = iov_iter_get_pages_alloc(i, &pages, INT_MAX, &start); |
| 443 | pages = ceph_get_direct_page_vector(data, | 443 | if (n < 0) |
| 444 | num_pages, true); | 444 | return n; |
| 445 | if (IS_ERR(pages)) | ||
| 446 | return PTR_ERR(pages); | ||
| 447 | 445 | ||
| 448 | ret = striped_read(inode, off, len, | 446 | num_pages = (n + start + PAGE_SIZE - 1) / PAGE_SIZE; |
| 447 | |||
| 448 | ret = striped_read(inode, off, n, | ||
| 449 | pages, num_pages, checkeof, | 449 | pages, num_pages, checkeof, |
| 450 | 1, (unsigned long)data & ~PAGE_MASK); | 450 | 1, start); |
| 451 | |||
| 451 | ceph_put_page_vector(pages, num_pages, true); | 452 | ceph_put_page_vector(pages, num_pages, true); |
| 452 | 453 | ||
| 453 | if (ret <= 0) | 454 | if (ret <= 0) |
| 454 | break; | 455 | break; |
| 455 | off += ret; | 456 | off += ret; |
| 456 | iov_iter_advance(i, ret); | 457 | iov_iter_advance(i, ret); |
| 457 | if (ret < len) | 458 | if (ret < n) |
| 458 | break; | 459 | break; |
| 459 | } | 460 | } |
| 460 | } else { | 461 | } else { |
| @@ -466,25 +467,14 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *i, | |||
| 466 | num_pages, checkeof, 0, 0); | 467 | num_pages, checkeof, 0, 0); |
| 467 | if (ret > 0) { | 468 | if (ret > 0) { |
| 468 | int l, k = 0; | 469 | int l, k = 0; |
| 469 | size_t left = len = ret; | 470 | size_t left = ret; |
| 470 | 471 | ||
| 471 | while (left) { | 472 | while (left) { |
| 472 | void __user *data = i->iov[0].iov_base | 473 | int copy = min_t(size_t, PAGE_SIZE, left); |
| 473 | + i->iov_offset; | 474 | l = copy_page_to_iter(pages[k++], 0, copy, i); |
| 474 | l = min(i->iov[0].iov_len - i->iov_offset, | 475 | off += l; |
| 475 | left); | 476 | left -= l; |
| 476 | 477 | if (l < copy) | |
| 477 | ret = ceph_copy_page_vector_to_user(&pages[k], | ||
| 478 | data, off, | ||
| 479 | l); | ||
| 480 | if (ret > 0) { | ||
| 481 | iov_iter_advance(i, ret); | ||
| 482 | left -= ret; | ||
| 483 | off += ret; | ||
| 484 | k = calc_pages_for(iocb->ki_pos, | ||
| 485 | len - left + 1) - 1; | ||
| 486 | BUG_ON(k >= num_pages && left); | ||
| 487 | } else | ||
| 488 | break; | 478 | break; |
| 489 | } | 479 | } |
| 490 | } | 480 | } |
| @@ -541,8 +531,7 @@ static void ceph_sync_write_unsafe(struct ceph_osd_request *req, bool unsafe) | |||
| 541 | * objects, rollback on failure, etc.) | 531 | * objects, rollback on failure, etc.) |
| 542 | */ | 532 | */ |
| 543 | static ssize_t | 533 | static ssize_t |
| 544 | ceph_sync_direct_write(struct kiocb *iocb, const struct iovec *iov, | 534 | ceph_sync_direct_write(struct kiocb *iocb, struct iov_iter *from) |
| 545 | unsigned long nr_segs, size_t count) | ||
| 546 | { | 535 | { |
| 547 | struct file *file = iocb->ki_filp; | 536 | struct file *file = iocb->ki_filp; |
| 548 | struct inode *inode = file_inode(file); | 537 | struct inode *inode = file_inode(file); |
| @@ -556,11 +545,10 @@ ceph_sync_direct_write(struct kiocb *iocb, const struct iovec *iov, | |||
| 556 | int written = 0; | 545 | int written = 0; |
| 557 | int flags; | 546 | int flags; |
| 558 | int check_caps = 0; | 547 | int check_caps = 0; |
| 559 | int page_align; | ||
| 560 | int ret; | 548 | int ret; |
| 561 | struct timespec mtime = CURRENT_TIME; | 549 | struct timespec mtime = CURRENT_TIME; |
| 562 | loff_t pos = iocb->ki_pos; | 550 | loff_t pos = iocb->ki_pos; |
| 563 | struct iov_iter i; | 551 | size_t count = iov_iter_count(from); |
| 564 | 552 | ||
| 565 | if (ceph_snap(file_inode(file)) != CEPH_NOSNAP) | 553 | if (ceph_snap(file_inode(file)) != CEPH_NOSNAP) |
| 566 | return -EROFS; | 554 | return -EROFS; |
| @@ -582,13 +570,10 @@ ceph_sync_direct_write(struct kiocb *iocb, const struct iovec *iov, | |||
| 582 | CEPH_OSD_FLAG_ONDISK | | 570 | CEPH_OSD_FLAG_ONDISK | |
| 583 | CEPH_OSD_FLAG_WRITE; | 571 | CEPH_OSD_FLAG_WRITE; |
| 584 | 572 | ||
| 585 | iov_iter_init(&i, iov, nr_segs, count, 0); | 573 | while (iov_iter_count(from) > 0) { |
| 586 | 574 | u64 len = iov_iter_single_seg_count(from); | |
| 587 | while (iov_iter_count(&i) > 0) { | 575 | size_t start; |
| 588 | void __user *data = i.iov->iov_base + i.iov_offset; | 576 | ssize_t n; |
| 589 | u64 len = i.iov->iov_len - i.iov_offset; | ||
| 590 | |||
| 591 | page_align = (unsigned long)data & ~PAGE_MASK; | ||
| 592 | 577 | ||
| 593 | snapc = ci->i_snap_realm->cached_context; | 578 | snapc = ci->i_snap_realm->cached_context; |
| 594 | vino = ceph_vino(inode); | 579 | vino = ceph_vino(inode); |
| @@ -604,20 +589,21 @@ ceph_sync_direct_write(struct kiocb *iocb, const struct iovec *iov, | |||
| 604 | break; | 589 | break; |
| 605 | } | 590 | } |
| 606 | 591 | ||
| 607 | num_pages = calc_pages_for(page_align, len); | 592 | n = iov_iter_get_pages_alloc(from, &pages, len, &start); |
| 608 | pages = ceph_get_direct_page_vector(data, num_pages, false); | 593 | if (unlikely(n < 0)) { |
| 609 | if (IS_ERR(pages)) { | 594 | ret = n; |
| 610 | ret = PTR_ERR(pages); | 595 | ceph_osdc_put_request(req); |
| 611 | goto out; | 596 | break; |
| 612 | } | 597 | } |
| 613 | 598 | ||
| 599 | num_pages = (n + start + PAGE_SIZE - 1) / PAGE_SIZE; | ||
| 614 | /* | 600 | /* |
| 615 | * throw out any page cache pages in this range. this | 601 | * throw out any page cache pages in this range. this |
| 616 | * may block. | 602 | * may block. |
| 617 | */ | 603 | */ |
| 618 | truncate_inode_pages_range(inode->i_mapping, pos, | 604 | truncate_inode_pages_range(inode->i_mapping, pos, |
| 619 | (pos+len) | (PAGE_CACHE_SIZE-1)); | 605 | (pos+n) | (PAGE_CACHE_SIZE-1)); |
| 620 | osd_req_op_extent_osd_data_pages(req, 0, pages, len, page_align, | 606 | osd_req_op_extent_osd_data_pages(req, 0, pages, n, start, |
| 621 | false, false); | 607 | false, false); |
| 622 | 608 | ||
| 623 | /* BUG_ON(vino.snap != CEPH_NOSNAP); */ | 609 | /* BUG_ON(vino.snap != CEPH_NOSNAP); */ |
| @@ -629,22 +615,20 @@ ceph_sync_direct_write(struct kiocb *iocb, const struct iovec *iov, | |||
| 629 | 615 | ||
| 630 | ceph_put_page_vector(pages, num_pages, false); | 616 | ceph_put_page_vector(pages, num_pages, false); |
| 631 | 617 | ||
| 632 | out: | ||
| 633 | ceph_osdc_put_request(req); | 618 | ceph_osdc_put_request(req); |
| 634 | if (ret == 0) { | 619 | if (ret) |
| 635 | pos += len; | ||
| 636 | written += len; | ||
| 637 | iov_iter_advance(&i, (size_t)len); | ||
| 638 | |||
| 639 | if (pos > i_size_read(inode)) { | ||
| 640 | check_caps = ceph_inode_set_size(inode, pos); | ||
| 641 | if (check_caps) | ||
| 642 | ceph_check_caps(ceph_inode(inode), | ||
| 643 | CHECK_CAPS_AUTHONLY, | ||
| 644 | NULL); | ||
| 645 | } | ||
| 646 | } else | ||
| 647 | break; | 620 | break; |
| 621 | pos += n; | ||
| 622 | written += n; | ||
| 623 | iov_iter_advance(from, n); | ||
| 624 | |||
| 625 | if (pos > i_size_read(inode)) { | ||
| 626 | check_caps = ceph_inode_set_size(inode, pos); | ||
| 627 | if (check_caps) | ||
| 628 | ceph_check_caps(ceph_inode(inode), | ||
| 629 | CHECK_CAPS_AUTHONLY, | ||
| 630 | NULL); | ||
| 631 | } | ||
| 648 | } | 632 | } |
| 649 | 633 | ||
| 650 | if (ret != -EOLDSNAPC && written > 0) { | 634 | if (ret != -EOLDSNAPC && written > 0) { |
| @@ -662,8 +646,7 @@ out: | |||
| 662 | * correct atomic write, we should e.g. take write locks on all | 646 | * correct atomic write, we should e.g. take write locks on all |
| 663 | * objects, rollback on failure, etc.) | 647 | * objects, rollback on failure, etc.) |
| 664 | */ | 648 | */ |
| 665 | static ssize_t ceph_sync_write(struct kiocb *iocb, const struct iovec *iov, | 649 | static ssize_t ceph_sync_write(struct kiocb *iocb, struct iov_iter *from) |
| 666 | unsigned long nr_segs, size_t count) | ||
| 667 | { | 650 | { |
| 668 | struct file *file = iocb->ki_filp; | 651 | struct file *file = iocb->ki_filp; |
| 669 | struct inode *inode = file_inode(file); | 652 | struct inode *inode = file_inode(file); |
| @@ -681,7 +664,7 @@ static ssize_t ceph_sync_write(struct kiocb *iocb, const struct iovec *iov, | |||
| 681 | int ret; | 664 | int ret; |
| 682 | struct timespec mtime = CURRENT_TIME; | 665 | struct timespec mtime = CURRENT_TIME; |
| 683 | loff_t pos = iocb->ki_pos; | 666 | loff_t pos = iocb->ki_pos; |
| 684 | struct iov_iter i; | 667 | size_t count = iov_iter_count(from); |
| 685 | 668 | ||
| 686 | if (ceph_snap(file_inode(file)) != CEPH_NOSNAP) | 669 | if (ceph_snap(file_inode(file)) != CEPH_NOSNAP) |
| 687 | return -EROFS; | 670 | return -EROFS; |
| @@ -703,9 +686,7 @@ static ssize_t ceph_sync_write(struct kiocb *iocb, const struct iovec *iov, | |||
| 703 | CEPH_OSD_FLAG_WRITE | | 686 | CEPH_OSD_FLAG_WRITE | |
| 704 | CEPH_OSD_FLAG_ACK; | 687 | CEPH_OSD_FLAG_ACK; |
| 705 | 688 | ||
| 706 | iov_iter_init(&i, iov, nr_segs, count, 0); | 689 | while ((len = iov_iter_count(from)) > 0) { |
| 707 | |||
| 708 | while ((len = iov_iter_count(&i)) > 0) { | ||
| 709 | size_t left; | 690 | size_t left; |
| 710 | int n; | 691 | int n; |
| 711 | 692 | ||
| @@ -737,13 +718,12 @@ static ssize_t ceph_sync_write(struct kiocb *iocb, const struct iovec *iov, | |||
| 737 | left = len; | 718 | left = len; |
| 738 | for (n = 0; n < num_pages; n++) { | 719 | for (n = 0; n < num_pages; n++) { |
| 739 | size_t plen = min_t(size_t, left, PAGE_SIZE); | 720 | size_t plen = min_t(size_t, left, PAGE_SIZE); |
| 740 | ret = iov_iter_copy_from_user(pages[n], &i, 0, plen); | 721 | ret = copy_page_from_iter(pages[n], 0, plen, from); |
| 741 | if (ret != plen) { | 722 | if (ret != plen) { |
| 742 | ret = -EFAULT; | 723 | ret = -EFAULT; |
| 743 | break; | 724 | break; |
| 744 | } | 725 | } |
| 745 | left -= ret; | 726 | left -= ret; |
| 746 | iov_iter_advance(&i, ret); | ||
| 747 | } | 727 | } |
| 748 | 728 | ||
| 749 | if (ret < 0) { | 729 | if (ret < 0) { |
| @@ -796,8 +776,7 @@ out: | |||
| 796 | * | 776 | * |
| 797 | * Hmm, the sync read case isn't actually async... should it be? | 777 | * Hmm, the sync read case isn't actually async... should it be? |
| 798 | */ | 778 | */ |
| 799 | static ssize_t ceph_aio_read(struct kiocb *iocb, const struct iovec *iov, | 779 | static ssize_t ceph_read_iter(struct kiocb *iocb, struct iov_iter *to) |
| 800 | unsigned long nr_segs, loff_t pos) | ||
| 801 | { | 780 | { |
| 802 | struct file *filp = iocb->ki_filp; | 781 | struct file *filp = iocb->ki_filp; |
| 803 | struct ceph_file_info *fi = filp->private_data; | 782 | struct ceph_file_info *fi = filp->private_data; |
| @@ -823,40 +802,20 @@ again: | |||
| 823 | if ((got & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) == 0 || | 802 | if ((got & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) == 0 || |
| 824 | (iocb->ki_filp->f_flags & O_DIRECT) || | 803 | (iocb->ki_filp->f_flags & O_DIRECT) || |
| 825 | (fi->flags & CEPH_F_SYNC)) { | 804 | (fi->flags & CEPH_F_SYNC)) { |
| 826 | struct iov_iter i; | ||
| 827 | 805 | ||
| 828 | dout("aio_sync_read %p %llx.%llx %llu~%u got cap refs on %s\n", | 806 | dout("aio_sync_read %p %llx.%llx %llu~%u got cap refs on %s\n", |
| 829 | inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len, | 807 | inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len, |
| 830 | ceph_cap_string(got)); | 808 | ceph_cap_string(got)); |
| 831 | 809 | ||
| 832 | if (!read) { | ||
| 833 | ret = generic_segment_checks(iov, &nr_segs, | ||
| 834 | &len, VERIFY_WRITE); | ||
| 835 | if (ret) | ||
| 836 | goto out; | ||
| 837 | } | ||
| 838 | |||
| 839 | iov_iter_init(&i, iov, nr_segs, len, read); | ||
| 840 | |||
| 841 | /* hmm, this isn't really async... */ | 810 | /* hmm, this isn't really async... */ |
| 842 | ret = ceph_sync_read(iocb, &i, &checkeof); | 811 | ret = ceph_sync_read(iocb, to, &checkeof); |
| 843 | } else { | 812 | } else { |
| 844 | /* | ||
| 845 | * We can't modify the content of iov, | ||
| 846 | * so we only read from beginning. | ||
| 847 | */ | ||
| 848 | if (read) { | ||
| 849 | iocb->ki_pos = pos; | ||
| 850 | len = iocb->ki_nbytes; | ||
| 851 | read = 0; | ||
| 852 | } | ||
| 853 | dout("aio_read %p %llx.%llx %llu~%u got cap refs on %s\n", | 813 | dout("aio_read %p %llx.%llx %llu~%u got cap refs on %s\n", |
| 854 | inode, ceph_vinop(inode), pos, (unsigned)len, | 814 | inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len, |
| 855 | ceph_cap_string(got)); | 815 | ceph_cap_string(got)); |
| 856 | 816 | ||
| 857 | ret = generic_file_aio_read(iocb, iov, nr_segs, pos); | 817 | ret = generic_file_read_iter(iocb, to); |
| 858 | } | 818 | } |
| 859 | out: | ||
| 860 | dout("aio_read %p %llx.%llx dropping cap refs on %s = %d\n", | 819 | dout("aio_read %p %llx.%llx dropping cap refs on %s = %d\n", |
| 861 | inode, ceph_vinop(inode), ceph_cap_string(got), (int)ret); | 820 | inode, ceph_vinop(inode), ceph_cap_string(got), (int)ret); |
| 862 | ceph_put_cap_refs(ci, got); | 821 | ceph_put_cap_refs(ci, got); |
| @@ -872,6 +831,7 @@ out: | |||
| 872 | ", reading more\n", iocb->ki_pos, | 831 | ", reading more\n", iocb->ki_pos, |
| 873 | inode->i_size); | 832 | inode->i_size); |
| 874 | 833 | ||
| 834 | iov_iter_advance(to, ret); | ||
| 875 | read += ret; | 835 | read += ret; |
| 876 | len -= ret; | 836 | len -= ret; |
| 877 | checkeof = 0; | 837 | checkeof = 0; |
| @@ -895,8 +855,7 @@ out: | |||
| 895 | * | 855 | * |
| 896 | * If we are near ENOSPC, write synchronously. | 856 | * If we are near ENOSPC, write synchronously. |
| 897 | */ | 857 | */ |
| 898 | static ssize_t ceph_aio_write(struct kiocb *iocb, const struct iovec *iov, | 858 | static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from) |
| 899 | unsigned long nr_segs, loff_t pos) | ||
| 900 | { | 859 | { |
| 901 | struct file *file = iocb->ki_filp; | 860 | struct file *file = iocb->ki_filp; |
| 902 | struct ceph_file_info *fi = file->private_data; | 861 | struct ceph_file_info *fi = file->private_data; |
| @@ -904,18 +863,15 @@ static ssize_t ceph_aio_write(struct kiocb *iocb, const struct iovec *iov, | |||
| 904 | struct ceph_inode_info *ci = ceph_inode(inode); | 863 | struct ceph_inode_info *ci = ceph_inode(inode); |
| 905 | struct ceph_osd_client *osdc = | 864 | struct ceph_osd_client *osdc = |
| 906 | &ceph_sb_to_client(inode->i_sb)->client->osdc; | 865 | &ceph_sb_to_client(inode->i_sb)->client->osdc; |
| 907 | ssize_t count, written = 0; | 866 | ssize_t count = iov_iter_count(from), written = 0; |
| 908 | int err, want, got; | 867 | int err, want, got; |
| 868 | loff_t pos = iocb->ki_pos; | ||
| 909 | 869 | ||
| 910 | if (ceph_snap(inode) != CEPH_NOSNAP) | 870 | if (ceph_snap(inode) != CEPH_NOSNAP) |
| 911 | return -EROFS; | 871 | return -EROFS; |
| 912 | 872 | ||
| 913 | mutex_lock(&inode->i_mutex); | 873 | mutex_lock(&inode->i_mutex); |
| 914 | 874 | ||
| 915 | err = generic_segment_checks(iov, &nr_segs, &count, VERIFY_READ); | ||
| 916 | if (err) | ||
| 917 | goto out; | ||
| 918 | |||
| 919 | /* We can write back this queue in page reclaim */ | 875 | /* We can write back this queue in page reclaim */ |
| 920 | current->backing_dev_info = file->f_mapping->backing_dev_info; | 876 | current->backing_dev_info = file->f_mapping->backing_dev_info; |
| 921 | 877 | ||
| @@ -925,6 +881,7 @@ static ssize_t ceph_aio_write(struct kiocb *iocb, const struct iovec *iov, | |||
| 925 | 881 | ||
| 926 | if (count == 0) | 882 | if (count == 0) |
| 927 | goto out; | 883 | goto out; |
| 884 | iov_iter_truncate(from, count); | ||
| 928 | 885 | ||
| 929 | err = file_remove_suid(file); | 886 | err = file_remove_suid(file); |
| 930 | if (err) | 887 | if (err) |
| @@ -956,23 +913,26 @@ retry_snap: | |||
| 956 | 913 | ||
| 957 | if ((got & (CEPH_CAP_FILE_BUFFER|CEPH_CAP_FILE_LAZYIO)) == 0 || | 914 | if ((got & (CEPH_CAP_FILE_BUFFER|CEPH_CAP_FILE_LAZYIO)) == 0 || |
| 958 | (file->f_flags & O_DIRECT) || (fi->flags & CEPH_F_SYNC)) { | 915 | (file->f_flags & O_DIRECT) || (fi->flags & CEPH_F_SYNC)) { |
| 916 | struct iov_iter data; | ||
| 959 | mutex_unlock(&inode->i_mutex); | 917 | mutex_unlock(&inode->i_mutex); |
| 918 | /* we might need to revert back to that point */ | ||
| 919 | data = *from; | ||
| 960 | if (file->f_flags & O_DIRECT) | 920 | if (file->f_flags & O_DIRECT) |
| 961 | written = ceph_sync_direct_write(iocb, iov, | 921 | written = ceph_sync_direct_write(iocb, &data); |
| 962 | nr_segs, count); | ||
| 963 | else | 922 | else |
| 964 | written = ceph_sync_write(iocb, iov, nr_segs, count); | 923 | written = ceph_sync_write(iocb, &data); |
| 965 | if (written == -EOLDSNAPC) { | 924 | if (written == -EOLDSNAPC) { |
| 966 | dout("aio_write %p %llx.%llx %llu~%u" | 925 | dout("aio_write %p %llx.%llx %llu~%u" |
| 967 | "got EOLDSNAPC, retrying\n", | 926 | "got EOLDSNAPC, retrying\n", |
| 968 | inode, ceph_vinop(inode), | 927 | inode, ceph_vinop(inode), |
| 969 | pos, (unsigned)iov->iov_len); | 928 | pos, (unsigned)count); |
| 970 | mutex_lock(&inode->i_mutex); | 929 | mutex_lock(&inode->i_mutex); |
| 971 | goto retry_snap; | 930 | goto retry_snap; |
| 972 | } | 931 | } |
| 932 | if (written > 0) | ||
| 933 | iov_iter_advance(from, written); | ||
| 973 | } else { | 934 | } else { |
| 974 | loff_t old_size = inode->i_size; | 935 | loff_t old_size = inode->i_size; |
| 975 | struct iov_iter from; | ||
| 976 | /* | 936 | /* |
| 977 | * No need to acquire the i_truncate_mutex. Because | 937 | * No need to acquire the i_truncate_mutex. Because |
| 978 | * the MDS revokes Fwb caps before sending truncate | 938 | * the MDS revokes Fwb caps before sending truncate |
| @@ -980,8 +940,7 @@ retry_snap: | |||
| 980 | * are pending vmtruncate. So write and vmtruncate | 940 | * are pending vmtruncate. So write and vmtruncate |
| 981 | * can not run at the same time | 941 | * can not run at the same time |
| 982 | */ | 942 | */ |
| 983 | iov_iter_init(&from, iov, nr_segs, count, 0); | 943 | written = generic_perform_write(file, from, pos); |
| 984 | written = generic_perform_write(file, &from, pos); | ||
| 985 | if (likely(written >= 0)) | 944 | if (likely(written >= 0)) |
| 986 | iocb->ki_pos = pos + written; | 945 | iocb->ki_pos = pos + written; |
| 987 | if (inode->i_size > old_size) | 946 | if (inode->i_size > old_size) |
| @@ -999,7 +958,7 @@ retry_snap: | |||
| 999 | } | 958 | } |
| 1000 | 959 | ||
| 1001 | dout("aio_write %p %llx.%llx %llu~%u dropping cap refs on %s\n", | 960 | dout("aio_write %p %llx.%llx %llu~%u dropping cap refs on %s\n", |
| 1002 | inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len, | 961 | inode, ceph_vinop(inode), pos, (unsigned)count, |
| 1003 | ceph_cap_string(got)); | 962 | ceph_cap_string(got)); |
| 1004 | ceph_put_cap_refs(ci, got); | 963 | ceph_put_cap_refs(ci, got); |
| 1005 | 964 | ||
| @@ -1276,16 +1235,16 @@ const struct file_operations ceph_file_fops = { | |||
| 1276 | .open = ceph_open, | 1235 | .open = ceph_open, |
| 1277 | .release = ceph_release, | 1236 | .release = ceph_release, |
| 1278 | .llseek = ceph_llseek, | 1237 | .llseek = ceph_llseek, |
| 1279 | .read = do_sync_read, | 1238 | .read = new_sync_read, |
| 1280 | .write = do_sync_write, | 1239 | .write = new_sync_write, |
| 1281 | .aio_read = ceph_aio_read, | 1240 | .read_iter = ceph_read_iter, |
| 1282 | .aio_write = ceph_aio_write, | 1241 | .write_iter = ceph_write_iter, |
| 1283 | .mmap = ceph_mmap, | 1242 | .mmap = ceph_mmap, |
| 1284 | .fsync = ceph_fsync, | 1243 | .fsync = ceph_fsync, |
| 1285 | .lock = ceph_lock, | 1244 | .lock = ceph_lock, |
| 1286 | .flock = ceph_flock, | 1245 | .flock = ceph_flock, |
| 1287 | .splice_read = generic_file_splice_read, | 1246 | .splice_read = generic_file_splice_read, |
| 1288 | .splice_write = generic_file_splice_write, | 1247 | .splice_write = iter_file_splice_write, |
| 1289 | .unlocked_ioctl = ceph_ioctl, | 1248 | .unlocked_ioctl = ceph_ioctl, |
| 1290 | .compat_ioctl = ceph_ioctl, | 1249 | .compat_ioctl = ceph_ioctl, |
| 1291 | .fallocate = ceph_fallocate, | 1250 | .fallocate = ceph_fallocate, |
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index e4fff9ff1c27..04c89c266cec 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c | |||
| @@ -10,6 +10,7 @@ | |||
| 10 | #include <linux/writeback.h> | 10 | #include <linux/writeback.h> |
| 11 | #include <linux/vmalloc.h> | 11 | #include <linux/vmalloc.h> |
| 12 | #include <linux/posix_acl.h> | 12 | #include <linux/posix_acl.h> |
| 13 | #include <linux/random.h> | ||
| 13 | 14 | ||
| 14 | #include "super.h" | 15 | #include "super.h" |
| 15 | #include "mds_client.h" | 16 | #include "mds_client.h" |
| @@ -179,9 +180,8 @@ struct ceph_inode_frag *__ceph_find_frag(struct ceph_inode_info *ci, u32 f) | |||
| 179 | * specified, copy the frag delegation info to the caller if | 180 | * specified, copy the frag delegation info to the caller if |
| 180 | * it is present. | 181 | * it is present. |
| 181 | */ | 182 | */ |
| 182 | u32 ceph_choose_frag(struct ceph_inode_info *ci, u32 v, | 183 | static u32 __ceph_choose_frag(struct ceph_inode_info *ci, u32 v, |
| 183 | struct ceph_inode_frag *pfrag, | 184 | struct ceph_inode_frag *pfrag, int *found) |
| 184 | int *found) | ||
| 185 | { | 185 | { |
| 186 | u32 t = ceph_frag_make(0, 0); | 186 | u32 t = ceph_frag_make(0, 0); |
| 187 | struct ceph_inode_frag *frag; | 187 | struct ceph_inode_frag *frag; |
| @@ -191,7 +191,6 @@ u32 ceph_choose_frag(struct ceph_inode_info *ci, u32 v, | |||
| 191 | if (found) | 191 | if (found) |
| 192 | *found = 0; | 192 | *found = 0; |
| 193 | 193 | ||
| 194 | mutex_lock(&ci->i_fragtree_mutex); | ||
| 195 | while (1) { | 194 | while (1) { |
| 196 | WARN_ON(!ceph_frag_contains_value(t, v)); | 195 | WARN_ON(!ceph_frag_contains_value(t, v)); |
| 197 | frag = __ceph_find_frag(ci, t); | 196 | frag = __ceph_find_frag(ci, t); |
| @@ -220,10 +219,19 @@ u32 ceph_choose_frag(struct ceph_inode_info *ci, u32 v, | |||
| 220 | } | 219 | } |
| 221 | dout("choose_frag(%x) = %x\n", v, t); | 220 | dout("choose_frag(%x) = %x\n", v, t); |
| 222 | 221 | ||
| 223 | mutex_unlock(&ci->i_fragtree_mutex); | ||
| 224 | return t; | 222 | return t; |
| 225 | } | 223 | } |
| 226 | 224 | ||
| 225 | u32 ceph_choose_frag(struct ceph_inode_info *ci, u32 v, | ||
| 226 | struct ceph_inode_frag *pfrag, int *found) | ||
| 227 | { | ||
| 228 | u32 ret; | ||
| 229 | mutex_lock(&ci->i_fragtree_mutex); | ||
| 230 | ret = __ceph_choose_frag(ci, v, pfrag, found); | ||
| 231 | mutex_unlock(&ci->i_fragtree_mutex); | ||
| 232 | return ret; | ||
| 233 | } | ||
| 234 | |||
| 227 | /* | 235 | /* |
| 228 | * Process dirfrag (delegation) info from the mds. Include leaf | 236 | * Process dirfrag (delegation) info from the mds. Include leaf |
| 229 | * fragment in tree ONLY if ndist > 0. Otherwise, only | 237 | * fragment in tree ONLY if ndist > 0. Otherwise, only |
| @@ -237,11 +245,17 @@ static int ceph_fill_dirfrag(struct inode *inode, | |||
| 237 | u32 id = le32_to_cpu(dirinfo->frag); | 245 | u32 id = le32_to_cpu(dirinfo->frag); |
| 238 | int mds = le32_to_cpu(dirinfo->auth); | 246 | int mds = le32_to_cpu(dirinfo->auth); |
| 239 | int ndist = le32_to_cpu(dirinfo->ndist); | 247 | int ndist = le32_to_cpu(dirinfo->ndist); |
| 248 | int diri_auth = -1; | ||
| 240 | int i; | 249 | int i; |
| 241 | int err = 0; | 250 | int err = 0; |
| 242 | 251 | ||
| 252 | spin_lock(&ci->i_ceph_lock); | ||
| 253 | if (ci->i_auth_cap) | ||
| 254 | diri_auth = ci->i_auth_cap->mds; | ||
| 255 | spin_unlock(&ci->i_ceph_lock); | ||
| 256 | |||
| 243 | mutex_lock(&ci->i_fragtree_mutex); | 257 | mutex_lock(&ci->i_fragtree_mutex); |
| 244 | if (ndist == 0) { | 258 | if (ndist == 0 && mds == diri_auth) { |
| 245 | /* no delegation info needed. */ | 259 | /* no delegation info needed. */ |
| 246 | frag = __ceph_find_frag(ci, id); | 260 | frag = __ceph_find_frag(ci, id); |
| 247 | if (!frag) | 261 | if (!frag) |
| @@ -286,6 +300,75 @@ out: | |||
| 286 | return err; | 300 | return err; |
| 287 | } | 301 | } |
| 288 | 302 | ||
| 303 | static int ceph_fill_fragtree(struct inode *inode, | ||
| 304 | struct ceph_frag_tree_head *fragtree, | ||
| 305 | struct ceph_mds_reply_dirfrag *dirinfo) | ||
| 306 | { | ||
| 307 | struct ceph_inode_info *ci = ceph_inode(inode); | ||
| 308 | struct ceph_inode_frag *frag; | ||
| 309 | struct rb_node *rb_node; | ||
| 310 | int i; | ||
| 311 | u32 id, nsplits; | ||
| 312 | bool update = false; | ||
| 313 | |||
| 314 | mutex_lock(&ci->i_fragtree_mutex); | ||
| 315 | nsplits = le32_to_cpu(fragtree->nsplits); | ||
| 316 | if (nsplits) { | ||
| 317 | i = prandom_u32() % nsplits; | ||
| 318 | id = le32_to_cpu(fragtree->splits[i].frag); | ||
| 319 | if (!__ceph_find_frag(ci, id)) | ||
| 320 | update = true; | ||
| 321 | } else if (!RB_EMPTY_ROOT(&ci->i_fragtree)) { | ||
| 322 | rb_node = rb_first(&ci->i_fragtree); | ||
| 323 | frag = rb_entry(rb_node, struct ceph_inode_frag, node); | ||
| 324 | if (frag->frag != ceph_frag_make(0, 0) || rb_next(rb_node)) | ||
| 325 | update = true; | ||
| 326 | } | ||
| 327 | if (!update && dirinfo) { | ||
| 328 | id = le32_to_cpu(dirinfo->frag); | ||
| 329 | if (id != __ceph_choose_frag(ci, id, NULL, NULL)) | ||
| 330 | update = true; | ||
| 331 | } | ||
| 332 | if (!update) | ||
| 333 | goto out_unlock; | ||
| 334 | |||
| 335 | dout("fill_fragtree %llx.%llx\n", ceph_vinop(inode)); | ||
| 336 | rb_node = rb_first(&ci->i_fragtree); | ||
| 337 | for (i = 0; i < nsplits; i++) { | ||
| 338 | id = le32_to_cpu(fragtree->splits[i].frag); | ||
| 339 | frag = NULL; | ||
| 340 | while (rb_node) { | ||
| 341 | frag = rb_entry(rb_node, struct ceph_inode_frag, node); | ||
| 342 | if (ceph_frag_compare(frag->frag, id) >= 0) { | ||
| 343 | if (frag->frag != id) | ||
| 344 | frag = NULL; | ||
| 345 | else | ||
| 346 | rb_node = rb_next(rb_node); | ||
| 347 | break; | ||
| 348 | } | ||
| 349 | rb_node = rb_next(rb_node); | ||
| 350 | rb_erase(&frag->node, &ci->i_fragtree); | ||
| 351 | kfree(frag); | ||
| 352 | frag = NULL; | ||
| 353 | } | ||
| 354 | if (!frag) { | ||
| 355 | frag = __get_or_create_frag(ci, id); | ||
| 356 | if (IS_ERR(frag)) | ||
| 357 | continue; | ||
| 358 | } | ||
| 359 | frag->split_by = le32_to_cpu(fragtree->splits[i].by); | ||
| 360 | dout(" frag %x split by %d\n", frag->frag, frag->split_by); | ||
| 361 | } | ||
| 362 | while (rb_node) { | ||
| 363 | frag = rb_entry(rb_node, struct ceph_inode_frag, node); | ||
| 364 | rb_node = rb_next(rb_node); | ||
| 365 | rb_erase(&frag->node, &ci->i_fragtree); | ||
| 366 | kfree(frag); | ||
| 367 | } | ||
| 368 | out_unlock: | ||
| 369 | mutex_unlock(&ci->i_fragtree_mutex); | ||
| 370 | return 0; | ||
| 371 | } | ||
| 289 | 372 | ||
| 290 | /* | 373 | /* |
| 291 | * initialize a newly allocated inode. | 374 | * initialize a newly allocated inode. |
| @@ -341,7 +424,6 @@ struct inode *ceph_alloc_inode(struct super_block *sb) | |||
| 341 | INIT_LIST_HEAD(&ci->i_cap_snaps); | 424 | INIT_LIST_HEAD(&ci->i_cap_snaps); |
| 342 | ci->i_head_snapc = NULL; | 425 | ci->i_head_snapc = NULL; |
| 343 | ci->i_snap_caps = 0; | 426 | ci->i_snap_caps = 0; |
| 344 | ci->i_cap_exporting_issued = 0; | ||
| 345 | 427 | ||
| 346 | for (i = 0; i < CEPH_FILE_MODE_NUM; i++) | 428 | for (i = 0; i < CEPH_FILE_MODE_NUM; i++) |
| 347 | ci->i_nr_by_mode[i] = 0; | 429 | ci->i_nr_by_mode[i] = 0; |
| @@ -407,7 +489,7 @@ void ceph_destroy_inode(struct inode *inode) | |||
| 407 | 489 | ||
| 408 | /* | 490 | /* |
| 409 | * we may still have a snap_realm reference if there are stray | 491 | * we may still have a snap_realm reference if there are stray |
| 410 | * caps in i_cap_exporting_issued or i_snap_caps. | 492 | * caps in i_snap_caps. |
| 411 | */ | 493 | */ |
| 412 | if (ci->i_snap_realm) { | 494 | if (ci->i_snap_realm) { |
| 413 | struct ceph_mds_client *mdsc = | 495 | struct ceph_mds_client *mdsc = |
| @@ -582,22 +664,26 @@ static int fill_inode(struct inode *inode, | |||
| 582 | unsigned long ttl_from, int cap_fmode, | 664 | unsigned long ttl_from, int cap_fmode, |
| 583 | struct ceph_cap_reservation *caps_reservation) | 665 | struct ceph_cap_reservation *caps_reservation) |
| 584 | { | 666 | { |
| 667 | struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; | ||
| 585 | struct ceph_mds_reply_inode *info = iinfo->in; | 668 | struct ceph_mds_reply_inode *info = iinfo->in; |
| 586 | struct ceph_inode_info *ci = ceph_inode(inode); | 669 | struct ceph_inode_info *ci = ceph_inode(inode); |
| 587 | int i; | 670 | int issued = 0, implemented, new_issued; |
| 588 | int issued = 0, implemented; | ||
| 589 | struct timespec mtime, atime, ctime; | 671 | struct timespec mtime, atime, ctime; |
| 590 | u32 nsplits; | ||
| 591 | struct ceph_inode_frag *frag; | ||
| 592 | struct rb_node *rb_node; | ||
| 593 | struct ceph_buffer *xattr_blob = NULL; | 672 | struct ceph_buffer *xattr_blob = NULL; |
| 673 | struct ceph_cap *new_cap = NULL; | ||
| 594 | int err = 0; | 674 | int err = 0; |
| 595 | int queue_trunc = 0; | 675 | bool wake = false; |
| 676 | bool queue_trunc = false; | ||
| 677 | bool new_version = false; | ||
| 596 | 678 | ||
| 597 | dout("fill_inode %p ino %llx.%llx v %llu had %llu\n", | 679 | dout("fill_inode %p ino %llx.%llx v %llu had %llu\n", |
| 598 | inode, ceph_vinop(inode), le64_to_cpu(info->version), | 680 | inode, ceph_vinop(inode), le64_to_cpu(info->version), |
| 599 | ci->i_version); | 681 | ci->i_version); |
| 600 | 682 | ||
| 683 | /* prealloc new cap struct */ | ||
| 684 | if (info->cap.caps && ceph_snap(inode) == CEPH_NOSNAP) | ||
| 685 | new_cap = ceph_get_cap(mdsc, caps_reservation); | ||
| 686 | |||
| 601 | /* | 687 | /* |
| 602 | * prealloc xattr data, if it looks like we'll need it. only | 688 | * prealloc xattr data, if it looks like we'll need it. only |
| 603 | * if len > 4 (meaning there are actually xattrs; the first 4 | 689 | * if len > 4 (meaning there are actually xattrs; the first 4 |
| @@ -623,19 +709,23 @@ static int fill_inode(struct inode *inode, | |||
| 623 | * 3 2 skip | 709 | * 3 2 skip |
| 624 | * 3 3 update | 710 | * 3 3 update |
| 625 | */ | 711 | */ |
| 626 | if (le64_to_cpu(info->version) > 0 && | 712 | if (ci->i_version == 0 || |
| 627 | (ci->i_version & ~1) >= le64_to_cpu(info->version)) | 713 | ((info->cap.flags & CEPH_CAP_FLAG_AUTH) && |
| 628 | goto no_change; | 714 | le64_to_cpu(info->version) > (ci->i_version & ~1))) |
| 629 | 715 | new_version = true; | |
| 716 | |||
| 630 | issued = __ceph_caps_issued(ci, &implemented); | 717 | issued = __ceph_caps_issued(ci, &implemented); |
| 631 | issued |= implemented | __ceph_caps_dirty(ci); | 718 | issued |= implemented | __ceph_caps_dirty(ci); |
| 719 | new_issued = ~issued & le32_to_cpu(info->cap.caps); | ||
| 632 | 720 | ||
| 633 | /* update inode */ | 721 | /* update inode */ |
| 634 | ci->i_version = le64_to_cpu(info->version); | 722 | ci->i_version = le64_to_cpu(info->version); |
| 635 | inode->i_version++; | 723 | inode->i_version++; |
| 636 | inode->i_rdev = le32_to_cpu(info->rdev); | 724 | inode->i_rdev = le32_to_cpu(info->rdev); |
| 725 | inode->i_blkbits = fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1; | ||
| 637 | 726 | ||
| 638 | if ((issued & CEPH_CAP_AUTH_EXCL) == 0) { | 727 | if ((new_version || (new_issued & CEPH_CAP_AUTH_SHARED)) && |
| 728 | (issued & CEPH_CAP_AUTH_EXCL) == 0) { | ||
| 639 | inode->i_mode = le32_to_cpu(info->mode); | 729 | inode->i_mode = le32_to_cpu(info->mode); |
| 640 | inode->i_uid = make_kuid(&init_user_ns, le32_to_cpu(info->uid)); | 730 | inode->i_uid = make_kuid(&init_user_ns, le32_to_cpu(info->uid)); |
| 641 | inode->i_gid = make_kgid(&init_user_ns, le32_to_cpu(info->gid)); | 731 | inode->i_gid = make_kgid(&init_user_ns, le32_to_cpu(info->gid)); |
| @@ -644,23 +734,35 @@ static int fill_inode(struct inode *inode, | |||
| 644 | from_kgid(&init_user_ns, inode->i_gid)); | 734 | from_kgid(&init_user_ns, inode->i_gid)); |
| 645 | } | 735 | } |
| 646 | 736 | ||
| 647 | if ((issued & CEPH_CAP_LINK_EXCL) == 0) | 737 | if ((new_version || (new_issued & CEPH_CAP_LINK_SHARED)) && |
| 738 | (issued & CEPH_CAP_LINK_EXCL) == 0) | ||
| 648 | set_nlink(inode, le32_to_cpu(info->nlink)); | 739 | set_nlink(inode, le32_to_cpu(info->nlink)); |
| 649 | 740 | ||
| 650 | /* be careful with mtime, atime, size */ | 741 | if (new_version || (new_issued & CEPH_CAP_ANY_RD)) { |
| 651 | ceph_decode_timespec(&atime, &info->atime); | 742 | /* be careful with mtime, atime, size */ |
| 652 | ceph_decode_timespec(&mtime, &info->mtime); | 743 | ceph_decode_timespec(&atime, &info->atime); |
| 653 | ceph_decode_timespec(&ctime, &info->ctime); | 744 | ceph_decode_timespec(&mtime, &info->mtime); |
| 654 | queue_trunc = ceph_fill_file_size(inode, issued, | 745 | ceph_decode_timespec(&ctime, &info->ctime); |
| 655 | le32_to_cpu(info->truncate_seq), | 746 | ceph_fill_file_time(inode, issued, |
| 656 | le64_to_cpu(info->truncate_size), | 747 | le32_to_cpu(info->time_warp_seq), |
| 657 | le64_to_cpu(info->size)); | 748 | &ctime, &mtime, &atime); |
| 658 | ceph_fill_file_time(inode, issued, | 749 | } |
| 659 | le32_to_cpu(info->time_warp_seq), | 750 | |
| 660 | &ctime, &mtime, &atime); | 751 | if (new_version || |
| 661 | 752 | (new_issued & (CEPH_CAP_ANY_FILE_RD | CEPH_CAP_ANY_FILE_WR))) { | |
| 662 | ci->i_layout = info->layout; | 753 | ci->i_layout = info->layout; |
| 663 | inode->i_blkbits = fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1; | 754 | queue_trunc = ceph_fill_file_size(inode, issued, |
| 755 | le32_to_cpu(info->truncate_seq), | ||
| 756 | le64_to_cpu(info->truncate_size), | ||
| 757 | le64_to_cpu(info->size)); | ||
| 758 | /* only update max_size on auth cap */ | ||
| 759 | if ((info->cap.flags & CEPH_CAP_FLAG_AUTH) && | ||
| 760 | ci->i_max_size != le64_to_cpu(info->max_size)) { | ||
| 761 | dout("max_size %lld -> %llu\n", ci->i_max_size, | ||
| 762 | le64_to_cpu(info->max_size)); | ||
| 763 | ci->i_max_size = le64_to_cpu(info->max_size); | ||
| 764 | } | ||
| 765 | } | ||
| 664 | 766 | ||
| 665 | /* xattrs */ | 767 | /* xattrs */ |
| 666 | /* note that if i_xattrs.len <= 4, i_xattrs.data will still be NULL. */ | 768 | /* note that if i_xattrs.len <= 4, i_xattrs.data will still be NULL. */ |
| @@ -745,58 +847,6 @@ static int fill_inode(struct inode *inode, | |||
| 745 | dout(" marking %p complete (empty)\n", inode); | 847 | dout(" marking %p complete (empty)\n", inode); |
| 746 | __ceph_dir_set_complete(ci, atomic_read(&ci->i_release_count)); | 848 | __ceph_dir_set_complete(ci, atomic_read(&ci->i_release_count)); |
| 747 | } | 849 | } |
| 748 | no_change: | ||
| 749 | /* only update max_size on auth cap */ | ||
| 750 | if ((info->cap.flags & CEPH_CAP_FLAG_AUTH) && | ||
| 751 | ci->i_max_size != le64_to_cpu(info->max_size)) { | ||
| 752 | dout("max_size %lld -> %llu\n", ci->i_max_size, | ||
| 753 | le64_to_cpu(info->max_size)); | ||
| 754 | ci->i_max_size = le64_to_cpu(info->max_size); | ||
| 755 | } | ||
| 756 | |||
| 757 | spin_unlock(&ci->i_ceph_lock); | ||
| 758 | |||
| 759 | /* queue truncate if we saw i_size decrease */ | ||
| 760 | if (queue_trunc) | ||
| 761 | ceph_queue_vmtruncate(inode); | ||
| 762 | |||
| 763 | /* populate frag tree */ | ||
| 764 | /* FIXME: move me up, if/when version reflects fragtree changes */ | ||
| 765 | nsplits = le32_to_cpu(info->fragtree.nsplits); | ||
| 766 | mutex_lock(&ci->i_fragtree_mutex); | ||
| 767 | rb_node = rb_first(&ci->i_fragtree); | ||
| 768 | for (i = 0; i < nsplits; i++) { | ||
| 769 | u32 id = le32_to_cpu(info->fragtree.splits[i].frag); | ||
| 770 | frag = NULL; | ||
| 771 | while (rb_node) { | ||
| 772 | frag = rb_entry(rb_node, struct ceph_inode_frag, node); | ||
| 773 | if (ceph_frag_compare(frag->frag, id) >= 0) { | ||
| 774 | if (frag->frag != id) | ||
| 775 | frag = NULL; | ||
| 776 | else | ||
| 777 | rb_node = rb_next(rb_node); | ||
| 778 | break; | ||
| 779 | } | ||
| 780 | rb_node = rb_next(rb_node); | ||
| 781 | rb_erase(&frag->node, &ci->i_fragtree); | ||
| 782 | kfree(frag); | ||
| 783 | frag = NULL; | ||
| 784 | } | ||
| 785 | if (!frag) { | ||
| 786 | frag = __get_or_create_frag(ci, id); | ||
| 787 | if (IS_ERR(frag)) | ||
| 788 | continue; | ||
| 789 | } | ||
| 790 | frag->split_by = le32_to_cpu(info->fragtree.splits[i].by); | ||
| 791 | dout(" frag %x split by %d\n", frag->frag, frag->split_by); | ||
| 792 | } | ||
| 793 | while (rb_node) { | ||
| 794 | frag = rb_entry(rb_node, struct ceph_inode_frag, node); | ||
| 795 | rb_node = rb_next(rb_node); | ||
| 796 | rb_erase(&frag->node, &ci->i_fragtree); | ||
| 797 | kfree(frag); | ||
| 798 | } | ||
| 799 | mutex_unlock(&ci->i_fragtree_mutex); | ||
| 800 | 850 | ||
| 801 | /* were we issued a capability? */ | 851 | /* were we issued a capability? */ |
| 802 | if (info->cap.caps) { | 852 | if (info->cap.caps) { |
| @@ -809,30 +859,41 @@ no_change: | |||
| 809 | le32_to_cpu(info->cap.seq), | 859 | le32_to_cpu(info->cap.seq), |
| 810 | le32_to_cpu(info->cap.mseq), | 860 | le32_to_cpu(info->cap.mseq), |
| 811 | le64_to_cpu(info->cap.realm), | 861 | le64_to_cpu(info->cap.realm), |
| 812 | info->cap.flags, | 862 | info->cap.flags, &new_cap); |
| 813 | caps_reservation); | 863 | wake = true; |
| 814 | } else { | 864 | } else { |
| 815 | spin_lock(&ci->i_ceph_lock); | ||
| 816 | dout(" %p got snap_caps %s\n", inode, | 865 | dout(" %p got snap_caps %s\n", inode, |
| 817 | ceph_cap_string(le32_to_cpu(info->cap.caps))); | 866 | ceph_cap_string(le32_to_cpu(info->cap.caps))); |
| 818 | ci->i_snap_caps |= le32_to_cpu(info->cap.caps); | 867 | ci->i_snap_caps |= le32_to_cpu(info->cap.caps); |
| 819 | if (cap_fmode >= 0) | 868 | if (cap_fmode >= 0) |
| 820 | __ceph_get_fmode(ci, cap_fmode); | 869 | __ceph_get_fmode(ci, cap_fmode); |
| 821 | spin_unlock(&ci->i_ceph_lock); | ||
| 822 | } | 870 | } |
| 823 | } else if (cap_fmode >= 0) { | 871 | } else if (cap_fmode >= 0) { |
| 824 | pr_warn("mds issued no caps on %llx.%llx\n", | 872 | pr_warn("mds issued no caps on %llx.%llx\n", |
| 825 | ceph_vinop(inode)); | 873 | ceph_vinop(inode)); |
| 826 | __ceph_get_fmode(ci, cap_fmode); | 874 | __ceph_get_fmode(ci, cap_fmode); |
| 827 | } | 875 | } |
| 876 | spin_unlock(&ci->i_ceph_lock); | ||
| 877 | |||
| 878 | if (wake) | ||
| 879 | wake_up_all(&ci->i_cap_wq); | ||
| 880 | |||
| 881 | /* queue truncate if we saw i_size decrease */ | ||
| 882 | if (queue_trunc) | ||
| 883 | ceph_queue_vmtruncate(inode); | ||
| 884 | |||
| 885 | /* populate frag tree */ | ||
| 886 | if (S_ISDIR(inode->i_mode)) | ||
| 887 | ceph_fill_fragtree(inode, &info->fragtree, dirinfo); | ||
| 828 | 888 | ||
| 829 | /* update delegation info? */ | 889 | /* update delegation info? */ |
| 830 | if (dirinfo) | 890 | if (dirinfo) |
| 831 | ceph_fill_dirfrag(inode, dirinfo); | 891 | ceph_fill_dirfrag(inode, dirinfo); |
| 832 | 892 | ||
| 833 | err = 0; | 893 | err = 0; |
| 834 | |||
| 835 | out: | 894 | out: |
| 895 | if (new_cap) | ||
| 896 | ceph_put_cap(mdsc, new_cap); | ||
| 836 | if (xattr_blob) | 897 | if (xattr_blob) |
| 837 | ceph_buffer_put(xattr_blob); | 898 | ceph_buffer_put(xattr_blob); |
| 838 | return err; | 899 | return err; |
| @@ -1485,7 +1546,7 @@ static void ceph_invalidate_work(struct work_struct *work) | |||
| 1485 | orig_gen = ci->i_rdcache_gen; | 1546 | orig_gen = ci->i_rdcache_gen; |
| 1486 | spin_unlock(&ci->i_ceph_lock); | 1547 | spin_unlock(&ci->i_ceph_lock); |
| 1487 | 1548 | ||
| 1488 | truncate_inode_pages(inode->i_mapping, 0); | 1549 | truncate_pagecache(inode, 0); |
| 1489 | 1550 | ||
| 1490 | spin_lock(&ci->i_ceph_lock); | 1551 | spin_lock(&ci->i_ceph_lock); |
| 1491 | if (orig_gen == ci->i_rdcache_gen && | 1552 | if (orig_gen == ci->i_rdcache_gen && |
| @@ -1588,7 +1649,7 @@ retry: | |||
| 1588 | ci->i_truncate_pending, to); | 1649 | ci->i_truncate_pending, to); |
| 1589 | spin_unlock(&ci->i_ceph_lock); | 1650 | spin_unlock(&ci->i_ceph_lock); |
| 1590 | 1651 | ||
| 1591 | truncate_inode_pages(inode->i_mapping, to); | 1652 | truncate_pagecache(inode, to); |
| 1592 | 1653 | ||
| 1593 | spin_lock(&ci->i_ceph_lock); | 1654 | spin_lock(&ci->i_ceph_lock); |
| 1594 | if (to == ci->i_truncate_size) { | 1655 | if (to == ci->i_truncate_size) { |
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 9a33b98cb000..92a2548278fc 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c | |||
| @@ -1558,6 +1558,8 @@ ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode) | |||
| 1558 | init_completion(&req->r_safe_completion); | 1558 | init_completion(&req->r_safe_completion); |
| 1559 | INIT_LIST_HEAD(&req->r_unsafe_item); | 1559 | INIT_LIST_HEAD(&req->r_unsafe_item); |
| 1560 | 1560 | ||
| 1561 | req->r_stamp = CURRENT_TIME; | ||
| 1562 | |||
| 1561 | req->r_op = op; | 1563 | req->r_op = op; |
| 1562 | req->r_direct_mode = mode; | 1564 | req->r_direct_mode = mode; |
| 1563 | return req; | 1565 | return req; |
| @@ -1783,7 +1785,8 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc, | |||
| 1783 | } | 1785 | } |
| 1784 | 1786 | ||
| 1785 | len = sizeof(*head) + | 1787 | len = sizeof(*head) + |
| 1786 | pathlen1 + pathlen2 + 2*(1 + sizeof(u32) + sizeof(u64)); | 1788 | pathlen1 + pathlen2 + 2*(1 + sizeof(u32) + sizeof(u64)) + |
| 1789 | sizeof(struct timespec); | ||
| 1787 | 1790 | ||
| 1788 | /* calculate (max) length for cap releases */ | 1791 | /* calculate (max) length for cap releases */ |
| 1789 | len += sizeof(struct ceph_mds_request_release) * | 1792 | len += sizeof(struct ceph_mds_request_release) * |
| @@ -1800,6 +1803,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc, | |||
| 1800 | goto out_free2; | 1803 | goto out_free2; |
| 1801 | } | 1804 | } |
| 1802 | 1805 | ||
| 1806 | msg->hdr.version = 2; | ||
| 1803 | msg->hdr.tid = cpu_to_le64(req->r_tid); | 1807 | msg->hdr.tid = cpu_to_le64(req->r_tid); |
| 1804 | 1808 | ||
| 1805 | head = msg->front.iov_base; | 1809 | head = msg->front.iov_base; |
| @@ -1836,6 +1840,9 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc, | |||
| 1836 | mds, req->r_old_inode_drop, req->r_old_inode_unless, 0); | 1840 | mds, req->r_old_inode_drop, req->r_old_inode_unless, 0); |
| 1837 | head->num_releases = cpu_to_le16(releases); | 1841 | head->num_releases = cpu_to_le16(releases); |
| 1838 | 1842 | ||
| 1843 | /* time stamp */ | ||
| 1844 | ceph_encode_copy(&p, &req->r_stamp, sizeof(req->r_stamp)); | ||
| 1845 | |||
| 1839 | BUG_ON(p > end); | 1846 | BUG_ON(p > end); |
| 1840 | msg->front.iov_len = p - msg->front.iov_base; | 1847 | msg->front.iov_len = p - msg->front.iov_base; |
| 1841 | msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); | 1848 | msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); |
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index e90cfccf93bd..e00737cf523c 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h | |||
| @@ -194,6 +194,7 @@ struct ceph_mds_request { | |||
| 194 | int r_fmode; /* file mode, if expecting cap */ | 194 | int r_fmode; /* file mode, if expecting cap */ |
| 195 | kuid_t r_uid; | 195 | kuid_t r_uid; |
| 196 | kgid_t r_gid; | 196 | kgid_t r_gid; |
| 197 | struct timespec r_stamp; | ||
| 197 | 198 | ||
| 198 | /* for choosing which mds to send this request to */ | 199 | /* for choosing which mds to send this request to */ |
| 199 | int r_direct_mode; | 200 | int r_direct_mode; |
diff --git a/fs/ceph/super.h b/fs/ceph/super.h index ead05cc1f447..12b20744e386 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h | |||
| @@ -292,7 +292,6 @@ struct ceph_inode_info { | |||
| 292 | struct ceph_snap_context *i_head_snapc; /* set if wr_buffer_head > 0 or | 292 | struct ceph_snap_context *i_head_snapc; /* set if wr_buffer_head > 0 or |
| 293 | dirty|flushing caps */ | 293 | dirty|flushing caps */ |
| 294 | unsigned i_snap_caps; /* cap bits for snapped files */ | 294 | unsigned i_snap_caps; /* cap bits for snapped files */ |
| 295 | unsigned i_cap_exporting_issued; | ||
| 296 | 295 | ||
| 297 | int i_nr_by_mode[CEPH_FILE_MODE_NUM]; /* open file counts */ | 296 | int i_nr_by_mode[CEPH_FILE_MODE_NUM]; /* open file counts */ |
| 298 | 297 | ||
| @@ -775,11 +774,13 @@ static inline void ceph_forget_all_cached_acls(struct inode *inode) | |||
| 775 | extern const char *ceph_cap_string(int c); | 774 | extern const char *ceph_cap_string(int c); |
| 776 | extern void ceph_handle_caps(struct ceph_mds_session *session, | 775 | extern void ceph_handle_caps(struct ceph_mds_session *session, |
| 777 | struct ceph_msg *msg); | 776 | struct ceph_msg *msg); |
| 778 | extern int ceph_add_cap(struct inode *inode, | 777 | extern struct ceph_cap *ceph_get_cap(struct ceph_mds_client *mdsc, |
| 779 | struct ceph_mds_session *session, u64 cap_id, | 778 | struct ceph_cap_reservation *ctx); |
| 780 | int fmode, unsigned issued, unsigned wanted, | 779 | extern void ceph_add_cap(struct inode *inode, |
| 781 | unsigned cap, unsigned seq, u64 realmino, int flags, | 780 | struct ceph_mds_session *session, u64 cap_id, |
| 782 | struct ceph_cap_reservation *caps_reservation); | 781 | int fmode, unsigned issued, unsigned wanted, |
| 782 | unsigned cap, unsigned seq, u64 realmino, int flags, | ||
| 783 | struct ceph_cap **new_cap); | ||
| 783 | extern void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release); | 784 | extern void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release); |
| 784 | extern void ceph_put_cap(struct ceph_mds_client *mdsc, | 785 | extern void ceph_put_cap(struct ceph_mds_client *mdsc, |
| 785 | struct ceph_cap *cap); | 786 | struct ceph_cap *cap); |
diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c index 0227b45ef00a..15e9505aa35f 100644 --- a/fs/cifs/cifs_unicode.c +++ b/fs/cifs/cifs_unicode.c | |||
| @@ -290,7 +290,8 @@ int | |||
| 290 | cifsConvertToUTF16(__le16 *target, const char *source, int srclen, | 290 | cifsConvertToUTF16(__le16 *target, const char *source, int srclen, |
| 291 | const struct nls_table *cp, int mapChars) | 291 | const struct nls_table *cp, int mapChars) |
| 292 | { | 292 | { |
| 293 | int i, j, charlen; | 293 | int i, charlen; |
| 294 | int j = 0; | ||
| 294 | char src_char; | 295 | char src_char; |
| 295 | __le16 dst_char; | 296 | __le16 dst_char; |
| 296 | wchar_t tmp; | 297 | wchar_t tmp; |
| @@ -298,12 +299,11 @@ cifsConvertToUTF16(__le16 *target, const char *source, int srclen, | |||
| 298 | if (!mapChars) | 299 | if (!mapChars) |
| 299 | return cifs_strtoUTF16(target, source, PATH_MAX, cp); | 300 | return cifs_strtoUTF16(target, source, PATH_MAX, cp); |
| 300 | 301 | ||
| 301 | for (i = 0, j = 0; i < srclen; j++) { | 302 | for (i = 0; i < srclen; j++) { |
| 302 | src_char = source[i]; | 303 | src_char = source[i]; |
| 303 | charlen = 1; | 304 | charlen = 1; |
| 304 | switch (src_char) { | 305 | switch (src_char) { |
| 305 | case 0: | 306 | case 0: |
| 306 | put_unaligned(0, &target[j]); | ||
| 307 | goto ctoUTF16_out; | 307 | goto ctoUTF16_out; |
| 308 | case ':': | 308 | case ':': |
| 309 | dst_char = cpu_to_le16(UNI_COLON); | 309 | dst_char = cpu_to_le16(UNI_COLON); |
| @@ -350,6 +350,7 @@ cifsConvertToUTF16(__le16 *target, const char *source, int srclen, | |||
| 350 | } | 350 | } |
| 351 | 351 | ||
| 352 | ctoUTF16_out: | 352 | ctoUTF16_out: |
| 353 | put_unaligned(0, &target[j]); /* Null terminate target unicode string */ | ||
| 353 | return j; | 354 | return j; |
| 354 | } | 355 | } |
| 355 | 356 | ||
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 6aaa8112c538..888398067420 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c | |||
| @@ -725,8 +725,20 @@ out_nls: | |||
| 725 | goto out; | 725 | goto out; |
| 726 | } | 726 | } |
| 727 | 727 | ||
| 728 | static ssize_t cifs_file_aio_write(struct kiocb *iocb, const struct iovec *iov, | 728 | static ssize_t |
| 729 | unsigned long nr_segs, loff_t pos) | 729 | cifs_loose_read_iter(struct kiocb *iocb, struct iov_iter *iter) |
| 730 | { | ||
| 731 | ssize_t rc; | ||
| 732 | struct inode *inode = file_inode(iocb->ki_filp); | ||
| 733 | |||
| 734 | rc = cifs_revalidate_mapping(inode); | ||
| 735 | if (rc) | ||
| 736 | return rc; | ||
| 737 | |||
| 738 | return generic_file_read_iter(iocb, iter); | ||
| 739 | } | ||
| 740 | |||
| 741 | static ssize_t cifs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) | ||
| 730 | { | 742 | { |
| 731 | struct inode *inode = file_inode(iocb->ki_filp); | 743 | struct inode *inode = file_inode(iocb->ki_filp); |
| 732 | struct cifsInodeInfo *cinode = CIFS_I(inode); | 744 | struct cifsInodeInfo *cinode = CIFS_I(inode); |
| @@ -737,14 +749,14 @@ static ssize_t cifs_file_aio_write(struct kiocb *iocb, const struct iovec *iov, | |||
| 737 | if (written) | 749 | if (written) |
| 738 | return written; | 750 | return written; |
| 739 | 751 | ||
| 740 | written = generic_file_aio_write(iocb, iov, nr_segs, pos); | 752 | written = generic_file_write_iter(iocb, from); |
| 741 | 753 | ||
| 742 | if (CIFS_CACHE_WRITE(CIFS_I(inode))) | 754 | if (CIFS_CACHE_WRITE(CIFS_I(inode))) |
| 743 | goto out; | 755 | goto out; |
| 744 | 756 | ||
| 745 | rc = filemap_fdatawrite(inode->i_mapping); | 757 | rc = filemap_fdatawrite(inode->i_mapping); |
| 746 | if (rc) | 758 | if (rc) |
| 747 | cifs_dbg(FYI, "cifs_file_aio_write: %d rc on %p inode\n", | 759 | cifs_dbg(FYI, "cifs_file_write_iter: %d rc on %p inode\n", |
| 748 | rc, inode); | 760 | rc, inode); |
| 749 | 761 | ||
| 750 | out: | 762 | out: |
| @@ -880,10 +892,10 @@ const struct inode_operations cifs_symlink_inode_ops = { | |||
| 880 | }; | 892 | }; |
| 881 | 893 | ||
| 882 | const struct file_operations cifs_file_ops = { | 894 | const struct file_operations cifs_file_ops = { |
| 883 | .read = do_sync_read, | 895 | .read = new_sync_read, |
| 884 | .write = do_sync_write, | 896 | .write = new_sync_write, |
| 885 | .aio_read = generic_file_aio_read, | 897 | .read_iter = cifs_loose_read_iter, |
| 886 | .aio_write = cifs_file_aio_write, | 898 | .write_iter = cifs_file_write_iter, |
| 887 | .open = cifs_open, | 899 | .open = cifs_open, |
| 888 | .release = cifs_close, | 900 | .release = cifs_close, |
| 889 | .lock = cifs_lock, | 901 | .lock = cifs_lock, |
| @@ -899,10 +911,10 @@ const struct file_operations cifs_file_ops = { | |||
| 899 | }; | 911 | }; |
| 900 | 912 | ||
| 901 | const struct file_operations cifs_file_strict_ops = { | 913 | const struct file_operations cifs_file_strict_ops = { |
| 902 | .read = do_sync_read, | 914 | .read = new_sync_read, |
| 903 | .write = do_sync_write, | 915 | .write = new_sync_write, |
| 904 | .aio_read = cifs_strict_readv, | 916 | .read_iter = cifs_strict_readv, |
| 905 | .aio_write = cifs_strict_writev, | 917 | .write_iter = cifs_strict_writev, |
| 906 | .open = cifs_open, | 918 | .open = cifs_open, |
| 907 | .release = cifs_close, | 919 | .release = cifs_close, |
| 908 | .lock = cifs_lock, | 920 | .lock = cifs_lock, |
| @@ -919,10 +931,10 @@ const struct file_operations cifs_file_strict_ops = { | |||
| 919 | 931 | ||
| 920 | const struct file_operations cifs_file_direct_ops = { | 932 | const struct file_operations cifs_file_direct_ops = { |
| 921 | /* BB reevaluate whether they can be done with directio, no cache */ | 933 | /* BB reevaluate whether they can be done with directio, no cache */ |
| 922 | .read = do_sync_read, | 934 | .read = new_sync_read, |
| 923 | .write = do_sync_write, | 935 | .write = new_sync_write, |
| 924 | .aio_read = cifs_user_readv, | 936 | .read_iter = cifs_user_readv, |
| 925 | .aio_write = cifs_user_writev, | 937 | .write_iter = cifs_user_writev, |
| 926 | .open = cifs_open, | 938 | .open = cifs_open, |
| 927 | .release = cifs_close, | 939 | .release = cifs_close, |
| 928 | .lock = cifs_lock, | 940 | .lock = cifs_lock, |
| @@ -938,10 +950,10 @@ const struct file_operations cifs_file_direct_ops = { | |||
| 938 | }; | 950 | }; |
| 939 | 951 | ||
| 940 | const struct file_operations cifs_file_nobrl_ops = { | 952 | const struct file_operations cifs_file_nobrl_ops = { |
| 941 | .read = do_sync_read, | 953 | .read = new_sync_read, |
| 942 | .write = do_sync_write, | 954 | .write = new_sync_write, |
| 943 | .aio_read = generic_file_aio_read, | 955 | .read_iter = cifs_loose_read_iter, |
| 944 | .aio_write = cifs_file_aio_write, | 956 | .write_iter = cifs_file_write_iter, |
| 945 | .open = cifs_open, | 957 | .open = cifs_open, |
| 946 | .release = cifs_close, | 958 | .release = cifs_close, |
| 947 | .fsync = cifs_fsync, | 959 | .fsync = cifs_fsync, |
| @@ -956,10 +968,10 @@ const struct file_operations cifs_file_nobrl_ops = { | |||
| 956 | }; | 968 | }; |
| 957 | 969 | ||
| 958 | const struct file_operations cifs_file_strict_nobrl_ops = { | 970 | const struct file_operations cifs_file_strict_nobrl_ops = { |
| 959 | .read = do_sync_read, | 971 | .read = new_sync_read, |
| 960 | .write = do_sync_write, | 972 | .write = new_sync_write, |
| 961 | .aio_read = cifs_strict_readv, | 973 | .read_iter = cifs_strict_readv, |
| 962 | .aio_write = cifs_strict_writev, | 974 | .write_iter = cifs_strict_writev, |
| 963 | .open = cifs_open, | 975 | .open = cifs_open, |
| 964 | .release = cifs_close, | 976 | .release = cifs_close, |
| 965 | .fsync = cifs_strict_fsync, | 977 | .fsync = cifs_strict_fsync, |
| @@ -975,10 +987,10 @@ const struct file_operations cifs_file_strict_nobrl_ops = { | |||
| 975 | 987 | ||
| 976 | const struct file_operations cifs_file_direct_nobrl_ops = { | 988 | const struct file_operations cifs_file_direct_nobrl_ops = { |
| 977 | /* BB reevaluate whether they can be done with directio, no cache */ | 989 | /* BB reevaluate whether they can be done with directio, no cache */ |
| 978 | .read = do_sync_read, | 990 | .read = new_sync_read, |
| 979 | .write = do_sync_write, | 991 | .write = new_sync_write, |
| 980 | .aio_read = cifs_user_readv, | 992 | .read_iter = cifs_user_readv, |
| 981 | .aio_write = cifs_user_writev, | 993 | .write_iter = cifs_user_writev, |
| 982 | .open = cifs_open, | 994 | .open = cifs_open, |
| 983 | .release = cifs_close, | 995 | .release = cifs_close, |
| 984 | .fsync = cifs_fsync, | 996 | .fsync = cifs_fsync, |
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 8fe51166d6e3..70f178a7c759 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h | |||
| @@ -95,14 +95,10 @@ extern const struct file_operations cifs_file_strict_nobrl_ops; | |||
| 95 | extern int cifs_open(struct inode *inode, struct file *file); | 95 | extern int cifs_open(struct inode *inode, struct file *file); |
| 96 | extern int cifs_close(struct inode *inode, struct file *file); | 96 | extern int cifs_close(struct inode *inode, struct file *file); |
| 97 | extern int cifs_closedir(struct inode *inode, struct file *file); | 97 | extern int cifs_closedir(struct inode *inode, struct file *file); |
| 98 | extern ssize_t cifs_user_readv(struct kiocb *iocb, const struct iovec *iov, | 98 | extern ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to); |
| 99 | unsigned long nr_segs, loff_t pos); | 99 | extern ssize_t cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to); |
| 100 | extern ssize_t cifs_strict_readv(struct kiocb *iocb, const struct iovec *iov, | 100 | extern ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from); |
| 101 | unsigned long nr_segs, loff_t pos); | 101 | extern ssize_t cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from); |
| 102 | extern ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov, | ||
| 103 | unsigned long nr_segs, loff_t pos); | ||
| 104 | extern ssize_t cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov, | ||
| 105 | unsigned long nr_segs, loff_t pos); | ||
| 106 | extern int cifs_lock(struct file *, int, struct file_lock *); | 102 | extern int cifs_lock(struct file *, int, struct file_lock *); |
| 107 | extern int cifs_fsync(struct file *, loff_t, loff_t, int); | 103 | extern int cifs_fsync(struct file *, loff_t, loff_t, int); |
| 108 | extern int cifs_strict_fsync(struct file *, loff_t, loff_t, int); | 104 | extern int cifs_strict_fsync(struct file *, loff_t, loff_t, int); |
diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 208f56eca4bf..e90a1e9aa627 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c | |||
| @@ -2385,14 +2385,12 @@ cifs_uncached_retry_writev(struct cifs_writedata *wdata) | |||
| 2385 | } | 2385 | } |
| 2386 | 2386 | ||
| 2387 | static ssize_t | 2387 | static ssize_t |
| 2388 | cifs_iovec_write(struct file *file, const struct iovec *iov, | 2388 | cifs_iovec_write(struct file *file, struct iov_iter *from, loff_t *poffset) |
| 2389 | unsigned long nr_segs, loff_t *poffset) | ||
| 2390 | { | 2389 | { |
| 2391 | unsigned long nr_pages, i; | 2390 | unsigned long nr_pages, i; |
| 2392 | size_t bytes, copied, len, cur_len; | 2391 | size_t bytes, copied, len, cur_len; |
| 2393 | ssize_t total_written = 0; | 2392 | ssize_t total_written = 0; |
| 2394 | loff_t offset; | 2393 | loff_t offset; |
| 2395 | struct iov_iter it; | ||
| 2396 | struct cifsFileInfo *open_file; | 2394 | struct cifsFileInfo *open_file; |
| 2397 | struct cifs_tcon *tcon; | 2395 | struct cifs_tcon *tcon; |
| 2398 | struct cifs_sb_info *cifs_sb; | 2396 | struct cifs_sb_info *cifs_sb; |
| @@ -2401,14 +2399,16 @@ cifs_iovec_write(struct file *file, const struct iovec *iov, | |||
| 2401 | int rc; | 2399 | int rc; |
| 2402 | pid_t pid; | 2400 | pid_t pid; |
| 2403 | 2401 | ||
| 2404 | len = iov_length(iov, nr_segs); | 2402 | len = iov_iter_count(from); |
| 2405 | if (!len) | ||
| 2406 | return 0; | ||
| 2407 | |||
| 2408 | rc = generic_write_checks(file, poffset, &len, 0); | 2403 | rc = generic_write_checks(file, poffset, &len, 0); |
| 2409 | if (rc) | 2404 | if (rc) |
| 2410 | return rc; | 2405 | return rc; |
| 2411 | 2406 | ||
| 2407 | if (!len) | ||
| 2408 | return 0; | ||
| 2409 | |||
| 2410 | iov_iter_truncate(from, len); | ||
| 2411 | |||
| 2412 | INIT_LIST_HEAD(&wdata_list); | 2412 | INIT_LIST_HEAD(&wdata_list); |
| 2413 | cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); | 2413 | cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); |
| 2414 | open_file = file->private_data; | 2414 | open_file = file->private_data; |
| @@ -2424,7 +2424,6 @@ cifs_iovec_write(struct file *file, const struct iovec *iov, | |||
| 2424 | else | 2424 | else |
| 2425 | pid = current->tgid; | 2425 | pid = current->tgid; |
| 2426 | 2426 | ||
| 2427 | iov_iter_init(&it, iov, nr_segs, len, 0); | ||
| 2428 | do { | 2427 | do { |
| 2429 | size_t save_len; | 2428 | size_t save_len; |
| 2430 | 2429 | ||
| @@ -2444,11 +2443,10 @@ cifs_iovec_write(struct file *file, const struct iovec *iov, | |||
| 2444 | 2443 | ||
| 2445 | save_len = cur_len; | 2444 | save_len = cur_len; |
| 2446 | for (i = 0; i < nr_pages; i++) { | 2445 | for (i = 0; i < nr_pages; i++) { |
| 2447 | bytes = min_t(const size_t, cur_len, PAGE_SIZE); | 2446 | bytes = min_t(size_t, cur_len, PAGE_SIZE); |
| 2448 | copied = iov_iter_copy_from_user(wdata->pages[i], &it, | 2447 | copied = copy_page_from_iter(wdata->pages[i], 0, bytes, |
| 2449 | 0, bytes); | 2448 | from); |
| 2450 | cur_len -= copied; | 2449 | cur_len -= copied; |
| 2451 | iov_iter_advance(&it, copied); | ||
| 2452 | /* | 2450 | /* |
| 2453 | * If we didn't copy as much as we expected, then that | 2451 | * If we didn't copy as much as we expected, then that |
| 2454 | * may mean we trod into an unmapped area. Stop copying | 2452 | * may mean we trod into an unmapped area. Stop copying |
| @@ -2546,11 +2544,11 @@ restart_loop: | |||
| 2546 | return total_written ? total_written : (ssize_t)rc; | 2544 | return total_written ? total_written : (ssize_t)rc; |
| 2547 | } | 2545 | } |
| 2548 | 2546 | ||
| 2549 | ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov, | 2547 | ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from) |
| 2550 | unsigned long nr_segs, loff_t pos) | ||
| 2551 | { | 2548 | { |
| 2552 | ssize_t written; | 2549 | ssize_t written; |
| 2553 | struct inode *inode; | 2550 | struct inode *inode; |
| 2551 | loff_t pos = iocb->ki_pos; | ||
| 2554 | 2552 | ||
| 2555 | inode = file_inode(iocb->ki_filp); | 2553 | inode = file_inode(iocb->ki_filp); |
| 2556 | 2554 | ||
| @@ -2560,7 +2558,7 @@ ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov, | |||
| 2560 | * write request. | 2558 | * write request. |
| 2561 | */ | 2559 | */ |
| 2562 | 2560 | ||
| 2563 | written = cifs_iovec_write(iocb->ki_filp, iov, nr_segs, &pos); | 2561 | written = cifs_iovec_write(iocb->ki_filp, from, &pos); |
| 2564 | if (written > 0) { | 2562 | if (written > 0) { |
| 2565 | set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(inode)->flags); | 2563 | set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(inode)->flags); |
| 2566 | iocb->ki_pos = pos; | 2564 | iocb->ki_pos = pos; |
| @@ -2570,8 +2568,7 @@ ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov, | |||
| 2570 | } | 2568 | } |
| 2571 | 2569 | ||
| 2572 | static ssize_t | 2570 | static ssize_t |
| 2573 | cifs_writev(struct kiocb *iocb, const struct iovec *iov, | 2571 | cifs_writev(struct kiocb *iocb, struct iov_iter *from) |
| 2574 | unsigned long nr_segs, loff_t pos) | ||
| 2575 | { | 2572 | { |
| 2576 | struct file *file = iocb->ki_filp; | 2573 | struct file *file = iocb->ki_filp; |
| 2577 | struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data; | 2574 | struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data; |
| @@ -2589,10 +2586,10 @@ cifs_writev(struct kiocb *iocb, const struct iovec *iov, | |||
| 2589 | mutex_lock(&inode->i_mutex); | 2586 | mutex_lock(&inode->i_mutex); |
| 2590 | if (file->f_flags & O_APPEND) | 2587 | if (file->f_flags & O_APPEND) |
| 2591 | lock_pos = i_size_read(inode); | 2588 | lock_pos = i_size_read(inode); |
| 2592 | if (!cifs_find_lock_conflict(cfile, lock_pos, iov_length(iov, nr_segs), | 2589 | if (!cifs_find_lock_conflict(cfile, lock_pos, iov_iter_count(from), |
| 2593 | server->vals->exclusive_lock_type, NULL, | 2590 | server->vals->exclusive_lock_type, NULL, |
| 2594 | CIFS_WRITE_OP)) { | 2591 | CIFS_WRITE_OP)) { |
| 2595 | rc = __generic_file_aio_write(iocb, iov, nr_segs); | 2592 | rc = __generic_file_write_iter(iocb, from); |
| 2596 | mutex_unlock(&inode->i_mutex); | 2593 | mutex_unlock(&inode->i_mutex); |
| 2597 | 2594 | ||
| 2598 | if (rc > 0) { | 2595 | if (rc > 0) { |
| @@ -2610,8 +2607,7 @@ cifs_writev(struct kiocb *iocb, const struct iovec *iov, | |||
| 2610 | } | 2607 | } |
| 2611 | 2608 | ||
| 2612 | ssize_t | 2609 | ssize_t |
| 2613 | cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov, | 2610 | cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from) |
| 2614 | unsigned long nr_segs, loff_t pos) | ||
| 2615 | { | 2611 | { |
| 2616 | struct inode *inode = file_inode(iocb->ki_filp); | 2612 | struct inode *inode = file_inode(iocb->ki_filp); |
| 2617 | struct cifsInodeInfo *cinode = CIFS_I(inode); | 2613 | struct cifsInodeInfo *cinode = CIFS_I(inode); |
| @@ -2629,11 +2625,10 @@ cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov, | |||
| 2629 | if (cap_unix(tcon->ses) && | 2625 | if (cap_unix(tcon->ses) && |
| 2630 | (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) | 2626 | (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) |
| 2631 | && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) { | 2627 | && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) { |
| 2632 | written = generic_file_aio_write( | 2628 | written = generic_file_write_iter(iocb, from); |
| 2633 | iocb, iov, nr_segs, pos); | ||
| 2634 | goto out; | 2629 | goto out; |
| 2635 | } | 2630 | } |
| 2636 | written = cifs_writev(iocb, iov, nr_segs, pos); | 2631 | written = cifs_writev(iocb, from); |
| 2637 | goto out; | 2632 | goto out; |
| 2638 | } | 2633 | } |
| 2639 | /* | 2634 | /* |
| @@ -2642,7 +2637,7 @@ cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov, | |||
| 2642 | * affected pages because it may cause a error with mandatory locks on | 2637 | * affected pages because it may cause a error with mandatory locks on |
| 2643 | * these pages but not on the region from pos to ppos+len-1. | 2638 | * these pages but not on the region from pos to ppos+len-1. |
| 2644 | */ | 2639 | */ |
| 2645 | written = cifs_user_writev(iocb, iov, nr_segs, pos); | 2640 | written = cifs_user_writev(iocb, from); |
| 2646 | if (written > 0 && CIFS_CACHE_READ(cinode)) { | 2641 | if (written > 0 && CIFS_CACHE_READ(cinode)) { |
| 2647 | /* | 2642 | /* |
| 2648 | * Windows 7 server can delay breaking level2 oplock if a write | 2643 | * Windows 7 server can delay breaking level2 oplock if a write |
| @@ -2831,32 +2826,25 @@ cifs_uncached_read_into_pages(struct TCP_Server_Info *server, | |||
| 2831 | return total_read > 0 ? total_read : result; | 2826 | return total_read > 0 ? total_read : result; |
| 2832 | } | 2827 | } |
| 2833 | 2828 | ||
| 2834 | ssize_t cifs_user_readv(struct kiocb *iocb, const struct iovec *iov, | 2829 | ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to) |
| 2835 | unsigned long nr_segs, loff_t pos) | ||
| 2836 | { | 2830 | { |
| 2837 | struct file *file = iocb->ki_filp; | 2831 | struct file *file = iocb->ki_filp; |
| 2838 | ssize_t rc; | 2832 | ssize_t rc; |
| 2839 | size_t len, cur_len; | 2833 | size_t len, cur_len; |
| 2840 | ssize_t total_read = 0; | 2834 | ssize_t total_read = 0; |
| 2841 | loff_t offset = pos; | 2835 | loff_t offset = iocb->ki_pos; |
| 2842 | unsigned int npages; | 2836 | unsigned int npages; |
| 2843 | struct cifs_sb_info *cifs_sb; | 2837 | struct cifs_sb_info *cifs_sb; |
| 2844 | struct cifs_tcon *tcon; | 2838 | struct cifs_tcon *tcon; |
| 2845 | struct cifsFileInfo *open_file; | 2839 | struct cifsFileInfo *open_file; |
| 2846 | struct cifs_readdata *rdata, *tmp; | 2840 | struct cifs_readdata *rdata, *tmp; |
| 2847 | struct list_head rdata_list; | 2841 | struct list_head rdata_list; |
| 2848 | struct iov_iter to; | ||
| 2849 | pid_t pid; | 2842 | pid_t pid; |
| 2850 | 2843 | ||
| 2851 | if (!nr_segs) | 2844 | len = iov_iter_count(to); |
| 2852 | return 0; | ||
| 2853 | |||
| 2854 | len = iov_length(iov, nr_segs); | ||
| 2855 | if (!len) | 2845 | if (!len) |
| 2856 | return 0; | 2846 | return 0; |
| 2857 | 2847 | ||
| 2858 | iov_iter_init(&to, iov, nr_segs, len, 0); | ||
| 2859 | |||
| 2860 | INIT_LIST_HEAD(&rdata_list); | 2848 | INIT_LIST_HEAD(&rdata_list); |
| 2861 | cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); | 2849 | cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); |
| 2862 | open_file = file->private_data; | 2850 | open_file = file->private_data; |
| @@ -2914,7 +2902,7 @@ error: | |||
| 2914 | if (!list_empty(&rdata_list)) | 2902 | if (!list_empty(&rdata_list)) |
| 2915 | rc = 0; | 2903 | rc = 0; |
| 2916 | 2904 | ||
| 2917 | len = iov_iter_count(&to); | 2905 | len = iov_iter_count(to); |
| 2918 | /* the loop below should proceed in the order of increasing offsets */ | 2906 | /* the loop below should proceed in the order of increasing offsets */ |
| 2919 | list_for_each_entry_safe(rdata, tmp, &rdata_list, list) { | 2907 | list_for_each_entry_safe(rdata, tmp, &rdata_list, list) { |
| 2920 | again: | 2908 | again: |
| @@ -2931,7 +2919,7 @@ error: | |||
| 2931 | goto again; | 2919 | goto again; |
| 2932 | } | 2920 | } |
| 2933 | } else { | 2921 | } else { |
| 2934 | rc = cifs_readdata_to_iov(rdata, &to); | 2922 | rc = cifs_readdata_to_iov(rdata, to); |
| 2935 | } | 2923 | } |
| 2936 | 2924 | ||
| 2937 | } | 2925 | } |
| @@ -2939,7 +2927,7 @@ error: | |||
| 2939 | kref_put(&rdata->refcount, cifs_uncached_readdata_release); | 2927 | kref_put(&rdata->refcount, cifs_uncached_readdata_release); |
| 2940 | } | 2928 | } |
| 2941 | 2929 | ||
| 2942 | total_read = len - iov_iter_count(&to); | 2930 | total_read = len - iov_iter_count(to); |
| 2943 | 2931 | ||
| 2944 | cifs_stats_bytes_read(tcon, total_read); | 2932 | cifs_stats_bytes_read(tcon, total_read); |
| 2945 | 2933 | ||
| @@ -2948,15 +2936,14 @@ error: | |||
| 2948 | rc = 0; | 2936 | rc = 0; |
| 2949 | 2937 | ||
| 2950 | if (total_read) { | 2938 | if (total_read) { |
| 2951 | iocb->ki_pos = pos + total_read; | 2939 | iocb->ki_pos += total_read; |
| 2952 | return total_read; | 2940 | return total_read; |
| 2953 | } | 2941 | } |
| 2954 | return rc; | 2942 | return rc; |
| 2955 | } | 2943 | } |
| 2956 | 2944 | ||
| 2957 | ssize_t | 2945 | ssize_t |
| 2958 | cifs_strict_readv(struct kiocb *iocb, const struct iovec *iov, | 2946 | cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to) |
| 2959 | unsigned long nr_segs, loff_t pos) | ||
| 2960 | { | 2947 | { |
| 2961 | struct inode *inode = file_inode(iocb->ki_filp); | 2948 | struct inode *inode = file_inode(iocb->ki_filp); |
| 2962 | struct cifsInodeInfo *cinode = CIFS_I(inode); | 2949 | struct cifsInodeInfo *cinode = CIFS_I(inode); |
| @@ -2975,22 +2962,22 @@ cifs_strict_readv(struct kiocb *iocb, const struct iovec *iov, | |||
| 2975 | * pos+len-1. | 2962 | * pos+len-1. |
| 2976 | */ | 2963 | */ |
| 2977 | if (!CIFS_CACHE_READ(cinode)) | 2964 | if (!CIFS_CACHE_READ(cinode)) |
| 2978 | return cifs_user_readv(iocb, iov, nr_segs, pos); | 2965 | return cifs_user_readv(iocb, to); |
| 2979 | 2966 | ||
| 2980 | if (cap_unix(tcon->ses) && | 2967 | if (cap_unix(tcon->ses) && |
| 2981 | (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) && | 2968 | (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) && |
| 2982 | ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) | 2969 | ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) |
| 2983 | return generic_file_aio_read(iocb, iov, nr_segs, pos); | 2970 | return generic_file_read_iter(iocb, to); |
| 2984 | 2971 | ||
| 2985 | /* | 2972 | /* |
| 2986 | * We need to hold the sem to be sure nobody modifies lock list | 2973 | * We need to hold the sem to be sure nobody modifies lock list |
| 2987 | * with a brlock that prevents reading. | 2974 | * with a brlock that prevents reading. |
| 2988 | */ | 2975 | */ |
| 2989 | down_read(&cinode->lock_sem); | 2976 | down_read(&cinode->lock_sem); |
| 2990 | if (!cifs_find_lock_conflict(cfile, pos, iov_length(iov, nr_segs), | 2977 | if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to), |
| 2991 | tcon->ses->server->vals->shared_lock_type, | 2978 | tcon->ses->server->vals->shared_lock_type, |
| 2992 | NULL, CIFS_READ_OP)) | 2979 | NULL, CIFS_READ_OP)) |
| 2993 | rc = generic_file_aio_read(iocb, iov, nr_segs, pos); | 2980 | rc = generic_file_read_iter(iocb, to); |
| 2994 | up_read(&cinode->lock_sem); | 2981 | up_read(&cinode->lock_sem); |
| 2995 | return rc; | 2982 | return rc; |
| 2996 | } | 2983 | } |
| @@ -3703,8 +3690,8 @@ void cifs_oplock_break(struct work_struct *work) | |||
| 3703 | * Direct IO is not yet supported in the cached mode. | 3690 | * Direct IO is not yet supported in the cached mode. |
| 3704 | */ | 3691 | */ |
| 3705 | static ssize_t | 3692 | static ssize_t |
| 3706 | cifs_direct_io(int rw, struct kiocb *iocb, const struct iovec *iov, | 3693 | cifs_direct_io(int rw, struct kiocb *iocb, struct iov_iter *iter, |
| 3707 | loff_t pos, unsigned long nr_segs) | 3694 | loff_t pos) |
| 3708 | { | 3695 | { |
| 3709 | /* | 3696 | /* |
| 3710 | * FIXME | 3697 | * FIXME |
diff --git a/fs/cifs/link.c b/fs/cifs/link.c index 264ece71bdb2..68559fd557fb 100644 --- a/fs/cifs/link.c +++ b/fs/cifs/link.c | |||
| @@ -374,7 +374,7 @@ cifs_create_mf_symlink(unsigned int xid, struct cifs_tcon *tcon, | |||
| 374 | oparms.cifs_sb = cifs_sb; | 374 | oparms.cifs_sb = cifs_sb; |
| 375 | oparms.desired_access = GENERIC_WRITE; | 375 | oparms.desired_access = GENERIC_WRITE; |
| 376 | oparms.create_options = create_options; | 376 | oparms.create_options = create_options; |
| 377 | oparms.disposition = FILE_OPEN; | 377 | oparms.disposition = FILE_CREATE; |
| 378 | oparms.path = path; | 378 | oparms.path = path; |
| 379 | oparms.fid = &fid; | 379 | oparms.fid = &fid; |
| 380 | oparms.reconnect = false; | 380 | oparms.reconnect = false; |
diff --git a/fs/dcache.c b/fs/dcache.c index 1792d6075b4f..06f65857a855 100644 --- a/fs/dcache.c +++ b/fs/dcache.c | |||
| @@ -532,10 +532,12 @@ static inline struct dentry *lock_parent(struct dentry *dentry) | |||
| 532 | struct dentry *parent = dentry->d_parent; | 532 | struct dentry *parent = dentry->d_parent; |
| 533 | if (IS_ROOT(dentry)) | 533 | if (IS_ROOT(dentry)) |
| 534 | return NULL; | 534 | return NULL; |
| 535 | if (unlikely((int)dentry->d_lockref.count < 0)) | ||
| 536 | return NULL; | ||
| 535 | if (likely(spin_trylock(&parent->d_lock))) | 537 | if (likely(spin_trylock(&parent->d_lock))) |
| 536 | return parent; | 538 | return parent; |
| 537 | spin_unlock(&dentry->d_lock); | ||
| 538 | rcu_read_lock(); | 539 | rcu_read_lock(); |
| 540 | spin_unlock(&dentry->d_lock); | ||
| 539 | again: | 541 | again: |
| 540 | parent = ACCESS_ONCE(dentry->d_parent); | 542 | parent = ACCESS_ONCE(dentry->d_parent); |
| 541 | spin_lock(&parent->d_lock); | 543 | spin_lock(&parent->d_lock); |
diff --git a/fs/direct-io.c b/fs/direct-io.c index 31ba0935e32e..98040ba388ac 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c | |||
| @@ -77,7 +77,6 @@ struct dio_submit { | |||
| 77 | unsigned blocks_available; /* At block_in_file. changes */ | 77 | unsigned blocks_available; /* At block_in_file. changes */ |
| 78 | int reap_counter; /* rate limit reaping */ | 78 | int reap_counter; /* rate limit reaping */ |
| 79 | sector_t final_block_in_request;/* doesn't change */ | 79 | sector_t final_block_in_request;/* doesn't change */ |
| 80 | unsigned first_block_in_page; /* doesn't change, Used only once */ | ||
| 81 | int boundary; /* prev block is at a boundary */ | 80 | int boundary; /* prev block is at a boundary */ |
| 82 | get_block_t *get_block; /* block mapping function */ | 81 | get_block_t *get_block; /* block mapping function */ |
| 83 | dio_submit_t *submit_io; /* IO submition function */ | 82 | dio_submit_t *submit_io; /* IO submition function */ |
| @@ -98,19 +97,14 @@ struct dio_submit { | |||
| 98 | sector_t cur_page_block; /* Where it starts */ | 97 | sector_t cur_page_block; /* Where it starts */ |
| 99 | loff_t cur_page_fs_offset; /* Offset in file */ | 98 | loff_t cur_page_fs_offset; /* Offset in file */ |
| 100 | 99 | ||
| 101 | /* | 100 | struct iov_iter *iter; |
| 102 | * Page fetching state. These variables belong to dio_refill_pages(). | ||
| 103 | */ | ||
| 104 | int curr_page; /* changes */ | ||
| 105 | int total_pages; /* doesn't change */ | ||
| 106 | unsigned long curr_user_address;/* changes */ | ||
| 107 | |||
| 108 | /* | 101 | /* |
| 109 | * Page queue. These variables belong to dio_refill_pages() and | 102 | * Page queue. These variables belong to dio_refill_pages() and |
| 110 | * dio_get_page(). | 103 | * dio_get_page(). |
| 111 | */ | 104 | */ |
| 112 | unsigned head; /* next page to process */ | 105 | unsigned head; /* next page to process */ |
| 113 | unsigned tail; /* last valid page + 1 */ | 106 | unsigned tail; /* last valid page + 1 */ |
| 107 | size_t from, to; | ||
| 114 | }; | 108 | }; |
| 115 | 109 | ||
| 116 | /* dio_state communicated between submission path and end_io */ | 110 | /* dio_state communicated between submission path and end_io */ |
| @@ -163,15 +157,10 @@ static inline unsigned dio_pages_present(struct dio_submit *sdio) | |||
| 163 | */ | 157 | */ |
| 164 | static inline int dio_refill_pages(struct dio *dio, struct dio_submit *sdio) | 158 | static inline int dio_refill_pages(struct dio *dio, struct dio_submit *sdio) |
| 165 | { | 159 | { |
| 166 | int ret; | 160 | ssize_t ret; |
| 167 | int nr_pages; | ||
| 168 | 161 | ||
| 169 | nr_pages = min(sdio->total_pages - sdio->curr_page, DIO_PAGES); | 162 | ret = iov_iter_get_pages(sdio->iter, dio->pages, DIO_PAGES * PAGE_SIZE, |
| 170 | ret = get_user_pages_fast( | 163 | &sdio->from); |
| 171 | sdio->curr_user_address, /* Where from? */ | ||
| 172 | nr_pages, /* How many pages? */ | ||
| 173 | dio->rw == READ, /* Write to memory? */ | ||
| 174 | &dio->pages[0]); /* Put results here */ | ||
| 175 | 164 | ||
| 176 | if (ret < 0 && sdio->blocks_available && (dio->rw & WRITE)) { | 165 | if (ret < 0 && sdio->blocks_available && (dio->rw & WRITE)) { |
| 177 | struct page *page = ZERO_PAGE(0); | 166 | struct page *page = ZERO_PAGE(0); |
| @@ -186,18 +175,19 @@ static inline int dio_refill_pages(struct dio *dio, struct dio_submit *sdio) | |||
| 186 | dio->pages[0] = page; | 175 | dio->pages[0] = page; |
| 187 | sdio->head = 0; | 176 | sdio->head = 0; |
| 188 | sdio->tail = 1; | 177 | sdio->tail = 1; |
| 189 | ret = 0; | 178 | sdio->from = 0; |
| 190 | goto out; | 179 | sdio->to = PAGE_SIZE; |
| 180 | return 0; | ||
| 191 | } | 181 | } |
| 192 | 182 | ||
| 193 | if (ret >= 0) { | 183 | if (ret >= 0) { |
| 194 | sdio->curr_user_address += ret * PAGE_SIZE; | 184 | iov_iter_advance(sdio->iter, ret); |
| 195 | sdio->curr_page += ret; | 185 | ret += sdio->from; |
| 196 | sdio->head = 0; | 186 | sdio->head = 0; |
| 197 | sdio->tail = ret; | 187 | sdio->tail = (ret + PAGE_SIZE - 1) / PAGE_SIZE; |
| 198 | ret = 0; | 188 | sdio->to = ((ret - 1) & (PAGE_SIZE - 1)) + 1; |
| 189 | return 0; | ||
| 199 | } | 190 | } |
| 200 | out: | ||
| 201 | return ret; | 191 | return ret; |
| 202 | } | 192 | } |
| 203 | 193 | ||
| @@ -208,8 +198,9 @@ out: | |||
| 208 | * L1 cache. | 198 | * L1 cache. |
| 209 | */ | 199 | */ |
| 210 | static inline struct page *dio_get_page(struct dio *dio, | 200 | static inline struct page *dio_get_page(struct dio *dio, |
| 211 | struct dio_submit *sdio) | 201 | struct dio_submit *sdio, size_t *from, size_t *to) |
| 212 | { | 202 | { |
| 203 | int n; | ||
| 213 | if (dio_pages_present(sdio) == 0) { | 204 | if (dio_pages_present(sdio) == 0) { |
| 214 | int ret; | 205 | int ret; |
| 215 | 206 | ||
| @@ -218,7 +209,10 @@ static inline struct page *dio_get_page(struct dio *dio, | |||
| 218 | return ERR_PTR(ret); | 209 | return ERR_PTR(ret); |
| 219 | BUG_ON(dio_pages_present(sdio) == 0); | 210 | BUG_ON(dio_pages_present(sdio) == 0); |
| 220 | } | 211 | } |
| 221 | return dio->pages[sdio->head++]; | 212 | n = sdio->head++; |
| 213 | *from = n ? 0 : sdio->from; | ||
| 214 | *to = (n == sdio->tail - 1) ? sdio->to : PAGE_SIZE; | ||
| 215 | return dio->pages[n]; | ||
| 222 | } | 216 | } |
| 223 | 217 | ||
| 224 | /** | 218 | /** |
| @@ -422,8 +416,8 @@ static inline void dio_bio_submit(struct dio *dio, struct dio_submit *sdio) | |||
| 422 | */ | 416 | */ |
| 423 | static inline void dio_cleanup(struct dio *dio, struct dio_submit *sdio) | 417 | static inline void dio_cleanup(struct dio *dio, struct dio_submit *sdio) |
| 424 | { | 418 | { |
| 425 | while (dio_pages_present(sdio)) | 419 | while (sdio->head < sdio->tail) |
| 426 | page_cache_release(dio_get_page(dio, sdio)); | 420 | page_cache_release(dio->pages[sdio->head++]); |
| 427 | } | 421 | } |
| 428 | 422 | ||
| 429 | /* | 423 | /* |
| @@ -912,23 +906,18 @@ static int do_direct_IO(struct dio *dio, struct dio_submit *sdio, | |||
| 912 | struct buffer_head *map_bh) | 906 | struct buffer_head *map_bh) |
| 913 | { | 907 | { |
| 914 | const unsigned blkbits = sdio->blkbits; | 908 | const unsigned blkbits = sdio->blkbits; |
| 915 | const unsigned blocks_per_page = PAGE_SIZE >> blkbits; | ||
| 916 | struct page *page; | ||
| 917 | unsigned block_in_page; | ||
| 918 | int ret = 0; | 909 | int ret = 0; |
| 919 | 910 | ||
| 920 | /* The I/O can start at any block offset within the first page */ | ||
| 921 | block_in_page = sdio->first_block_in_page; | ||
| 922 | |||
| 923 | while (sdio->block_in_file < sdio->final_block_in_request) { | 911 | while (sdio->block_in_file < sdio->final_block_in_request) { |
| 924 | page = dio_get_page(dio, sdio); | 912 | struct page *page; |
| 913 | size_t from, to; | ||
| 914 | page = dio_get_page(dio, sdio, &from, &to); | ||
| 925 | if (IS_ERR(page)) { | 915 | if (IS_ERR(page)) { |
| 926 | ret = PTR_ERR(page); | 916 | ret = PTR_ERR(page); |
| 927 | goto out; | 917 | goto out; |
| 928 | } | 918 | } |
| 929 | 919 | ||
| 930 | while (block_in_page < blocks_per_page) { | 920 | while (from < to) { |
| 931 | unsigned offset_in_page = block_in_page << blkbits; | ||
| 932 | unsigned this_chunk_bytes; /* # of bytes mapped */ | 921 | unsigned this_chunk_bytes; /* # of bytes mapped */ |
| 933 | unsigned this_chunk_blocks; /* # of blocks */ | 922 | unsigned this_chunk_blocks; /* # of blocks */ |
| 934 | unsigned u; | 923 | unsigned u; |
| @@ -999,10 +988,10 @@ do_holes: | |||
| 999 | page_cache_release(page); | 988 | page_cache_release(page); |
| 1000 | goto out; | 989 | goto out; |
| 1001 | } | 990 | } |
| 1002 | zero_user(page, block_in_page << blkbits, | 991 | zero_user(page, from, 1 << blkbits); |
| 1003 | 1 << blkbits); | ||
| 1004 | sdio->block_in_file++; | 992 | sdio->block_in_file++; |
| 1005 | block_in_page++; | 993 | from += 1 << blkbits; |
| 994 | dio->result += 1 << blkbits; | ||
| 1006 | goto next_block; | 995 | goto next_block; |
| 1007 | } | 996 | } |
| 1008 | 997 | ||
| @@ -1019,7 +1008,7 @@ do_holes: | |||
| 1019 | * can add to this page | 1008 | * can add to this page |
| 1020 | */ | 1009 | */ |
| 1021 | this_chunk_blocks = sdio->blocks_available; | 1010 | this_chunk_blocks = sdio->blocks_available; |
| 1022 | u = (PAGE_SIZE - offset_in_page) >> blkbits; | 1011 | u = (to - from) >> blkbits; |
| 1023 | if (this_chunk_blocks > u) | 1012 | if (this_chunk_blocks > u) |
| 1024 | this_chunk_blocks = u; | 1013 | this_chunk_blocks = u; |
| 1025 | u = sdio->final_block_in_request - sdio->block_in_file; | 1014 | u = sdio->final_block_in_request - sdio->block_in_file; |
| @@ -1031,7 +1020,7 @@ do_holes: | |||
| 1031 | if (this_chunk_blocks == sdio->blocks_available) | 1020 | if (this_chunk_blocks == sdio->blocks_available) |
| 1032 | sdio->boundary = buffer_boundary(map_bh); | 1021 | sdio->boundary = buffer_boundary(map_bh); |
| 1033 | ret = submit_page_section(dio, sdio, page, | 1022 | ret = submit_page_section(dio, sdio, page, |
| 1034 | offset_in_page, | 1023 | from, |
| 1035 | this_chunk_bytes, | 1024 | this_chunk_bytes, |
| 1036 | sdio->next_block_for_io, | 1025 | sdio->next_block_for_io, |
| 1037 | map_bh); | 1026 | map_bh); |
| @@ -1042,7 +1031,8 @@ do_holes: | |||
| 1042 | sdio->next_block_for_io += this_chunk_blocks; | 1031 | sdio->next_block_for_io += this_chunk_blocks; |
| 1043 | 1032 | ||
| 1044 | sdio->block_in_file += this_chunk_blocks; | 1033 | sdio->block_in_file += this_chunk_blocks; |
| 1045 | block_in_page += this_chunk_blocks; | 1034 | from += this_chunk_bytes; |
| 1035 | dio->result += this_chunk_bytes; | ||
| 1046 | sdio->blocks_available -= this_chunk_blocks; | 1036 | sdio->blocks_available -= this_chunk_blocks; |
| 1047 | next_block: | 1037 | next_block: |
| 1048 | BUG_ON(sdio->block_in_file > sdio->final_block_in_request); | 1038 | BUG_ON(sdio->block_in_file > sdio->final_block_in_request); |
| @@ -1052,7 +1042,6 @@ next_block: | |||
| 1052 | 1042 | ||
| 1053 | /* Drop the ref which was taken in get_user_pages() */ | 1043 | /* Drop the ref which was taken in get_user_pages() */ |
| 1054 | page_cache_release(page); | 1044 | page_cache_release(page); |
| 1055 | block_in_page = 0; | ||
| 1056 | } | 1045 | } |
| 1057 | out: | 1046 | out: |
| 1058 | return ret; | 1047 | return ret; |
| @@ -1107,24 +1096,20 @@ static inline int drop_refcount(struct dio *dio) | |||
| 1107 | */ | 1096 | */ |
| 1108 | static inline ssize_t | 1097 | static inline ssize_t |
| 1109 | do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, | 1098 | do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, |
| 1110 | struct block_device *bdev, const struct iovec *iov, loff_t offset, | 1099 | struct block_device *bdev, struct iov_iter *iter, loff_t offset, |
| 1111 | unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io, | 1100 | get_block_t get_block, dio_iodone_t end_io, |
| 1112 | dio_submit_t submit_io, int flags) | 1101 | dio_submit_t submit_io, int flags) |
| 1113 | { | 1102 | { |
| 1114 | int seg; | ||
| 1115 | size_t size; | ||
| 1116 | unsigned long addr; | ||
| 1117 | unsigned i_blkbits = ACCESS_ONCE(inode->i_blkbits); | 1103 | unsigned i_blkbits = ACCESS_ONCE(inode->i_blkbits); |
| 1118 | unsigned blkbits = i_blkbits; | 1104 | unsigned blkbits = i_blkbits; |
| 1119 | unsigned blocksize_mask = (1 << blkbits) - 1; | 1105 | unsigned blocksize_mask = (1 << blkbits) - 1; |
| 1120 | ssize_t retval = -EINVAL; | 1106 | ssize_t retval = -EINVAL; |
| 1121 | loff_t end = offset; | 1107 | loff_t end = offset + iov_iter_count(iter); |
| 1122 | struct dio *dio; | 1108 | struct dio *dio; |
| 1123 | struct dio_submit sdio = { 0, }; | 1109 | struct dio_submit sdio = { 0, }; |
| 1124 | unsigned long user_addr; | ||
| 1125 | size_t bytes; | ||
| 1126 | struct buffer_head map_bh = { 0, }; | 1110 | struct buffer_head map_bh = { 0, }; |
| 1127 | struct blk_plug plug; | 1111 | struct blk_plug plug; |
| 1112 | unsigned long align = offset | iov_iter_alignment(iter); | ||
| 1128 | 1113 | ||
| 1129 | if (rw & WRITE) | 1114 | if (rw & WRITE) |
| 1130 | rw = WRITE_ODIRECT; | 1115 | rw = WRITE_ODIRECT; |
| @@ -1134,32 +1119,16 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, | |||
| 1134 | * the early prefetch in the caller enough time. | 1119 | * the early prefetch in the caller enough time. |
| 1135 | */ | 1120 | */ |
| 1136 | 1121 | ||
| 1137 | if (offset & blocksize_mask) { | 1122 | if (align & blocksize_mask) { |
| 1138 | if (bdev) | 1123 | if (bdev) |
| 1139 | blkbits = blksize_bits(bdev_logical_block_size(bdev)); | 1124 | blkbits = blksize_bits(bdev_logical_block_size(bdev)); |
| 1140 | blocksize_mask = (1 << blkbits) - 1; | 1125 | blocksize_mask = (1 << blkbits) - 1; |
| 1141 | if (offset & blocksize_mask) | 1126 | if (align & blocksize_mask) |
| 1142 | goto out; | 1127 | goto out; |
| 1143 | } | 1128 | } |
| 1144 | 1129 | ||
| 1145 | /* Check the memory alignment. Blocks cannot straddle pages */ | ||
| 1146 | for (seg = 0; seg < nr_segs; seg++) { | ||
| 1147 | addr = (unsigned long)iov[seg].iov_base; | ||
| 1148 | size = iov[seg].iov_len; | ||
| 1149 | end += size; | ||
| 1150 | if (unlikely((addr & blocksize_mask) || | ||
| 1151 | (size & blocksize_mask))) { | ||
| 1152 | if (bdev) | ||
| 1153 | blkbits = blksize_bits( | ||
| 1154 | bdev_logical_block_size(bdev)); | ||
| 1155 | blocksize_mask = (1 << blkbits) - 1; | ||
| 1156 | if ((addr & blocksize_mask) || (size & blocksize_mask)) | ||
| 1157 | goto out; | ||
| 1158 | } | ||
| 1159 | } | ||
| 1160 | |||
| 1161 | /* watch out for a 0 len io from a tricksy fs */ | 1130 | /* watch out for a 0 len io from a tricksy fs */ |
| 1162 | if (rw == READ && end == offset) | 1131 | if (rw == READ && !iov_iter_count(iter)) |
| 1163 | return 0; | 1132 | return 0; |
| 1164 | 1133 | ||
| 1165 | dio = kmem_cache_alloc(dio_cache, GFP_KERNEL); | 1134 | dio = kmem_cache_alloc(dio_cache, GFP_KERNEL); |
| @@ -1249,6 +1218,10 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, | |||
| 1249 | spin_lock_init(&dio->bio_lock); | 1218 | spin_lock_init(&dio->bio_lock); |
| 1250 | dio->refcount = 1; | 1219 | dio->refcount = 1; |
| 1251 | 1220 | ||
| 1221 | sdio.iter = iter; | ||
| 1222 | sdio.final_block_in_request = | ||
| 1223 | (offset + iov_iter_count(iter)) >> blkbits; | ||
| 1224 | |||
| 1252 | /* | 1225 | /* |
| 1253 | * In case of non-aligned buffers, we may need 2 more | 1226 | * In case of non-aligned buffers, we may need 2 more |
| 1254 | * pages since we need to zero out first and last block. | 1227 | * pages since we need to zero out first and last block. |
| @@ -1256,47 +1229,13 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, | |||
| 1256 | if (unlikely(sdio.blkfactor)) | 1229 | if (unlikely(sdio.blkfactor)) |
| 1257 | sdio.pages_in_io = 2; | 1230 | sdio.pages_in_io = 2; |
| 1258 | 1231 | ||
| 1259 | for (seg = 0; seg < nr_segs; seg++) { | 1232 | sdio.pages_in_io += iov_iter_npages(iter, INT_MAX); |
| 1260 | user_addr = (unsigned long)iov[seg].iov_base; | ||
| 1261 | sdio.pages_in_io += | ||
| 1262 | ((user_addr + iov[seg].iov_len + PAGE_SIZE-1) / | ||
| 1263 | PAGE_SIZE - user_addr / PAGE_SIZE); | ||
| 1264 | } | ||
| 1265 | 1233 | ||
| 1266 | blk_start_plug(&plug); | 1234 | blk_start_plug(&plug); |
| 1267 | 1235 | ||
| 1268 | for (seg = 0; seg < nr_segs; seg++) { | 1236 | retval = do_direct_IO(dio, &sdio, &map_bh); |
| 1269 | user_addr = (unsigned long)iov[seg].iov_base; | 1237 | if (retval) |
| 1270 | sdio.size += bytes = iov[seg].iov_len; | 1238 | dio_cleanup(dio, &sdio); |
| 1271 | |||
| 1272 | /* Index into the first page of the first block */ | ||
| 1273 | sdio.first_block_in_page = (user_addr & ~PAGE_MASK) >> blkbits; | ||
| 1274 | sdio.final_block_in_request = sdio.block_in_file + | ||
| 1275 | (bytes >> blkbits); | ||
| 1276 | /* Page fetching state */ | ||
| 1277 | sdio.head = 0; | ||
| 1278 | sdio.tail = 0; | ||
| 1279 | sdio.curr_page = 0; | ||
| 1280 | |||
| 1281 | sdio.total_pages = 0; | ||
| 1282 | if (user_addr & (PAGE_SIZE-1)) { | ||
| 1283 | sdio.total_pages++; | ||
| 1284 | bytes -= PAGE_SIZE - (user_addr & (PAGE_SIZE - 1)); | ||
| 1285 | } | ||
| 1286 | sdio.total_pages += (bytes + PAGE_SIZE - 1) / PAGE_SIZE; | ||
| 1287 | sdio.curr_user_address = user_addr; | ||
| 1288 | |||
| 1289 | retval = do_direct_IO(dio, &sdio, &map_bh); | ||
| 1290 | |||
| 1291 | dio->result += iov[seg].iov_len - | ||
| 1292 | ((sdio.final_block_in_request - sdio.block_in_file) << | ||
| 1293 | blkbits); | ||
| 1294 | |||
| 1295 | if (retval) { | ||
| 1296 | dio_cleanup(dio, &sdio); | ||
| 1297 | break; | ||
| 1298 | } | ||
| 1299 | } /* end iovec loop */ | ||
| 1300 | 1239 | ||
| 1301 | if (retval == -ENOTBLK) { | 1240 | if (retval == -ENOTBLK) { |
| 1302 | /* | 1241 | /* |
| @@ -1365,8 +1304,8 @@ out: | |||
| 1365 | 1304 | ||
| 1366 | ssize_t | 1305 | ssize_t |
| 1367 | __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, | 1306 | __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, |
| 1368 | struct block_device *bdev, const struct iovec *iov, loff_t offset, | 1307 | struct block_device *bdev, struct iov_iter *iter, loff_t offset, |
| 1369 | unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io, | 1308 | get_block_t get_block, dio_iodone_t end_io, |
| 1370 | dio_submit_t submit_io, int flags) | 1309 | dio_submit_t submit_io, int flags) |
| 1371 | { | 1310 | { |
| 1372 | /* | 1311 | /* |
| @@ -1381,9 +1320,8 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, | |||
| 1381 | prefetch(bdev->bd_queue); | 1320 | prefetch(bdev->bd_queue); |
| 1382 | prefetch((char *)bdev->bd_queue + SMP_CACHE_BYTES); | 1321 | prefetch((char *)bdev->bd_queue + SMP_CACHE_BYTES); |
| 1383 | 1322 | ||
| 1384 | return do_blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset, | 1323 | return do_blockdev_direct_IO(rw, iocb, inode, bdev, iter, offset, |
| 1385 | nr_segs, get_block, end_io, | 1324 | get_block, end_io, submit_io, flags); |
| 1386 | submit_io, flags); | ||
| 1387 | } | 1325 | } |
| 1388 | 1326 | ||
| 1389 | EXPORT_SYMBOL(__blockdev_direct_IO); | 1327 | EXPORT_SYMBOL(__blockdev_direct_IO); |
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index 1e5b45359509..d08e079ea5d3 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c | |||
| @@ -617,6 +617,11 @@ static void retry_failed_sctp_send(struct connection *recv_con, | |||
| 617 | int nodeid = sn_send_failed->ssf_info.sinfo_ppid; | 617 | int nodeid = sn_send_failed->ssf_info.sinfo_ppid; |
| 618 | 618 | ||
| 619 | log_print("Retry sending %d bytes to node id %d", len, nodeid); | 619 | log_print("Retry sending %d bytes to node id %d", len, nodeid); |
| 620 | |||
| 621 | if (!nodeid) { | ||
| 622 | log_print("Shouldn't resend data via listening connection."); | ||
| 623 | return; | ||
| 624 | } | ||
| 620 | 625 | ||
| 621 | con = nodeid2con(nodeid, 0); | 626 | con = nodeid2con(nodeid, 0); |
| 622 | if (!con) { | 627 | if (!con) { |
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c index b1eaa7a1f82c..db0fad3269c0 100644 --- a/fs/ecryptfs/file.c +++ b/fs/ecryptfs/file.c | |||
| @@ -45,14 +45,13 @@ | |||
| 45 | * The function to be used for directory reads is ecryptfs_read. | 45 | * The function to be used for directory reads is ecryptfs_read. |
| 46 | */ | 46 | */ |
| 47 | static ssize_t ecryptfs_read_update_atime(struct kiocb *iocb, | 47 | static ssize_t ecryptfs_read_update_atime(struct kiocb *iocb, |
| 48 | const struct iovec *iov, | 48 | struct iov_iter *to) |
| 49 | unsigned long nr_segs, loff_t pos) | ||
| 50 | { | 49 | { |
| 51 | ssize_t rc; | 50 | ssize_t rc; |
| 52 | struct path *path; | 51 | struct path *path; |
| 53 | struct file *file = iocb->ki_filp; | 52 | struct file *file = iocb->ki_filp; |
| 54 | 53 | ||
| 55 | rc = generic_file_aio_read(iocb, iov, nr_segs, pos); | 54 | rc = generic_file_read_iter(iocb, to); |
| 56 | /* | 55 | /* |
| 57 | * Even though this is a async interface, we need to wait | 56 | * Even though this is a async interface, we need to wait |
| 58 | * for IO to finish to update atime | 57 | * for IO to finish to update atime |
| @@ -352,10 +351,10 @@ const struct file_operations ecryptfs_dir_fops = { | |||
| 352 | 351 | ||
| 353 | const struct file_operations ecryptfs_main_fops = { | 352 | const struct file_operations ecryptfs_main_fops = { |
| 354 | .llseek = generic_file_llseek, | 353 | .llseek = generic_file_llseek, |
| 355 | .read = do_sync_read, | 354 | .read = new_sync_read, |
| 356 | .aio_read = ecryptfs_read_update_atime, | 355 | .read_iter = ecryptfs_read_update_atime, |
| 357 | .write = do_sync_write, | 356 | .write = new_sync_write, |
| 358 | .aio_write = generic_file_aio_write, | 357 | .write_iter = generic_file_write_iter, |
| 359 | .iterate = ecryptfs_readdir, | 358 | .iterate = ecryptfs_readdir, |
| 360 | .unlocked_ioctl = ecryptfs_unlocked_ioctl, | 359 | .unlocked_ioctl = ecryptfs_unlocked_ioctl, |
| 361 | #ifdef CONFIG_COMPAT | 360 | #ifdef CONFIG_COMPAT |
diff --git a/fs/eventpoll.c b/fs/eventpoll.c index b73e0621ce9e..b10b48c2a7af 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c | |||
| @@ -910,7 +910,7 @@ static const struct file_operations eventpoll_fops = { | |||
| 910 | void eventpoll_release_file(struct file *file) | 910 | void eventpoll_release_file(struct file *file) |
| 911 | { | 911 | { |
| 912 | struct eventpoll *ep; | 912 | struct eventpoll *ep; |
| 913 | struct epitem *epi; | 913 | struct epitem *epi, *next; |
| 914 | 914 | ||
| 915 | /* | 915 | /* |
| 916 | * We don't want to get "file->f_lock" because it is not | 916 | * We don't want to get "file->f_lock" because it is not |
| @@ -926,7 +926,7 @@ void eventpoll_release_file(struct file *file) | |||
| 926 | * Besides, ep_remove() acquires the lock, so we can't hold it here. | 926 | * Besides, ep_remove() acquires the lock, so we can't hold it here. |
| 927 | */ | 927 | */ |
| 928 | mutex_lock(&epmutex); | 928 | mutex_lock(&epmutex); |
| 929 | list_for_each_entry_rcu(epi, &file->f_ep_links, fllink) { | 929 | list_for_each_entry_safe(epi, next, &file->f_ep_links, fllink) { |
| 930 | ep = epi->ep; | 930 | ep = epi->ep; |
| 931 | mutex_lock_nested(&ep->mtx, 0); | 931 | mutex_lock_nested(&ep->mtx, 0); |
| 932 | ep_remove(ep, epi); | 932 | ep_remove(ep, epi); |
| @@ -1046,13 +1046,13 @@ EXPORT_SYMBOL_GPL(get_task_comm); | |||
| 1046 | * so that a new one can be started | 1046 | * so that a new one can be started |
| 1047 | */ | 1047 | */ |
| 1048 | 1048 | ||
| 1049 | void set_task_comm(struct task_struct *tsk, const char *buf) | 1049 | void __set_task_comm(struct task_struct *tsk, const char *buf, bool exec) |
| 1050 | { | 1050 | { |
| 1051 | task_lock(tsk); | 1051 | task_lock(tsk); |
| 1052 | trace_task_rename(tsk, buf); | 1052 | trace_task_rename(tsk, buf); |
| 1053 | strlcpy(tsk->comm, buf, sizeof(tsk->comm)); | 1053 | strlcpy(tsk->comm, buf, sizeof(tsk->comm)); |
| 1054 | task_unlock(tsk); | 1054 | task_unlock(tsk); |
| 1055 | perf_event_comm(tsk); | 1055 | perf_event_comm(tsk, exec); |
| 1056 | } | 1056 | } |
| 1057 | 1057 | ||
| 1058 | int flush_old_exec(struct linux_binprm * bprm) | 1058 | int flush_old_exec(struct linux_binprm * bprm) |
| @@ -1110,7 +1110,8 @@ void setup_new_exec(struct linux_binprm * bprm) | |||
| 1110 | else | 1110 | else |
| 1111 | set_dumpable(current->mm, suid_dumpable); | 1111 | set_dumpable(current->mm, suid_dumpable); |
| 1112 | 1112 | ||
| 1113 | set_task_comm(current, kbasename(bprm->filename)); | 1113 | perf_event_exec(); |
| 1114 | __set_task_comm(current, kbasename(bprm->filename), true); | ||
| 1114 | 1115 | ||
| 1115 | /* Set the new mm task size. We have to do that late because it may | 1116 | /* Set the new mm task size. We have to do that late because it may |
| 1116 | * depend on TIF_32BIT which is only updated in flush_thread() on | 1117 | * depend on TIF_32BIT which is only updated in flush_thread() on |
diff --git a/fs/exofs/file.c b/fs/exofs/file.c index 491c6c078e7f..71bf8e4fb5d4 100644 --- a/fs/exofs/file.c +++ b/fs/exofs/file.c | |||
| @@ -67,17 +67,17 @@ static int exofs_flush(struct file *file, fl_owner_t id) | |||
| 67 | 67 | ||
| 68 | const struct file_operations exofs_file_operations = { | 68 | const struct file_operations exofs_file_operations = { |
| 69 | .llseek = generic_file_llseek, | 69 | .llseek = generic_file_llseek, |
| 70 | .read = do_sync_read, | 70 | .read = new_sync_read, |
| 71 | .write = do_sync_write, | 71 | .write = new_sync_write, |
| 72 | .aio_read = generic_file_aio_read, | 72 | .read_iter = generic_file_read_iter, |
| 73 | .aio_write = generic_file_aio_write, | 73 | .write_iter = generic_file_write_iter, |
| 74 | .mmap = generic_file_mmap, | 74 | .mmap = generic_file_mmap, |
| 75 | .open = generic_file_open, | 75 | .open = generic_file_open, |
| 76 | .release = exofs_release_file, | 76 | .release = exofs_release_file, |
| 77 | .fsync = exofs_file_fsync, | 77 | .fsync = exofs_file_fsync, |
| 78 | .flush = exofs_flush, | 78 | .flush = exofs_flush, |
| 79 | .splice_read = generic_file_splice_read, | 79 | .splice_read = generic_file_splice_read, |
| 80 | .splice_write = generic_file_splice_write, | 80 | .splice_write = iter_file_splice_write, |
| 81 | }; | 81 | }; |
| 82 | 82 | ||
| 83 | const struct inode_operations exofs_file_inode_operations = { | 83 | const struct inode_operations exofs_file_inode_operations = { |
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c index d1c244d67667..3f9cafd73931 100644 --- a/fs/exofs/inode.c +++ b/fs/exofs/inode.c | |||
| @@ -964,7 +964,7 @@ static void exofs_invalidatepage(struct page *page, unsigned int offset, | |||
| 964 | 964 | ||
| 965 | /* TODO: Should be easy enough to do proprly */ | 965 | /* TODO: Should be easy enough to do proprly */ |
| 966 | static ssize_t exofs_direct_IO(int rw, struct kiocb *iocb, | 966 | static ssize_t exofs_direct_IO(int rw, struct kiocb *iocb, |
| 967 | const struct iovec *iov, loff_t offset, unsigned long nr_segs) | 967 | struct iov_iter *iter, loff_t offset) |
| 968 | { | 968 | { |
| 969 | return 0; | 969 | return 0; |
| 970 | } | 970 | } |
diff --git a/fs/ext2/file.c b/fs/ext2/file.c index 44c36e590765..7c87b22a7228 100644 --- a/fs/ext2/file.c +++ b/fs/ext2/file.c | |||
| @@ -62,10 +62,10 @@ int ext2_fsync(struct file *file, loff_t start, loff_t end, int datasync) | |||
| 62 | */ | 62 | */ |
| 63 | const struct file_operations ext2_file_operations = { | 63 | const struct file_operations ext2_file_operations = { |
| 64 | .llseek = generic_file_llseek, | 64 | .llseek = generic_file_llseek, |
| 65 | .read = do_sync_read, | 65 | .read = new_sync_read, |
| 66 | .write = do_sync_write, | 66 | .write = new_sync_write, |
| 67 | .aio_read = generic_file_aio_read, | 67 | .read_iter = generic_file_read_iter, |
| 68 | .aio_write = generic_file_aio_write, | 68 | .write_iter = generic_file_write_iter, |
| 69 | .unlocked_ioctl = ext2_ioctl, | 69 | .unlocked_ioctl = ext2_ioctl, |
| 70 | #ifdef CONFIG_COMPAT | 70 | #ifdef CONFIG_COMPAT |
| 71 | .compat_ioctl = ext2_compat_ioctl, | 71 | .compat_ioctl = ext2_compat_ioctl, |
| @@ -75,7 +75,7 @@ const struct file_operations ext2_file_operations = { | |||
| 75 | .release = ext2_release_file, | 75 | .release = ext2_release_file, |
| 76 | .fsync = ext2_fsync, | 76 | .fsync = ext2_fsync, |
| 77 | .splice_read = generic_file_splice_read, | 77 | .splice_read = generic_file_splice_read, |
| 78 | .splice_write = generic_file_splice_write, | 78 | .splice_write = iter_file_splice_write, |
| 79 | }; | 79 | }; |
| 80 | 80 | ||
| 81 | #ifdef CONFIG_EXT2_FS_XIP | 81 | #ifdef CONFIG_EXT2_FS_XIP |
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index b1d2a4675d42..36d35c36311d 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c | |||
| @@ -850,18 +850,18 @@ static sector_t ext2_bmap(struct address_space *mapping, sector_t block) | |||
| 850 | } | 850 | } |
| 851 | 851 | ||
| 852 | static ssize_t | 852 | static ssize_t |
| 853 | ext2_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, | 853 | ext2_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, |
| 854 | loff_t offset, unsigned long nr_segs) | 854 | loff_t offset) |
| 855 | { | 855 | { |
| 856 | struct file *file = iocb->ki_filp; | 856 | struct file *file = iocb->ki_filp; |
| 857 | struct address_space *mapping = file->f_mapping; | 857 | struct address_space *mapping = file->f_mapping; |
| 858 | struct inode *inode = mapping->host; | 858 | struct inode *inode = mapping->host; |
| 859 | size_t count = iov_iter_count(iter); | ||
| 859 | ssize_t ret; | 860 | ssize_t ret; |
| 860 | 861 | ||
| 861 | ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs, | 862 | ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, ext2_get_block); |
| 862 | ext2_get_block); | ||
| 863 | if (ret < 0 && (rw & WRITE)) | 863 | if (ret < 0 && (rw & WRITE)) |
| 864 | ext2_write_failed(mapping, offset + iov_length(iov, nr_segs)); | 864 | ext2_write_failed(mapping, offset + count); |
| 865 | return ret; | 865 | return ret; |
| 866 | } | 866 | } |
| 867 | 867 | ||
diff --git a/fs/ext3/file.c b/fs/ext3/file.c index aad05311392a..a062fa1e1b11 100644 --- a/fs/ext3/file.c +++ b/fs/ext3/file.c | |||
| @@ -50,10 +50,10 @@ static int ext3_release_file (struct inode * inode, struct file * filp) | |||
| 50 | 50 | ||
| 51 | const struct file_operations ext3_file_operations = { | 51 | const struct file_operations ext3_file_operations = { |
| 52 | .llseek = generic_file_llseek, | 52 | .llseek = generic_file_llseek, |
| 53 | .read = do_sync_read, | 53 | .read = new_sync_read, |
| 54 | .write = do_sync_write, | 54 | .write = new_sync_write, |
| 55 | .aio_read = generic_file_aio_read, | 55 | .read_iter = generic_file_read_iter, |
| 56 | .aio_write = generic_file_aio_write, | 56 | .write_iter = generic_file_write_iter, |
| 57 | .unlocked_ioctl = ext3_ioctl, | 57 | .unlocked_ioctl = ext3_ioctl, |
| 58 | #ifdef CONFIG_COMPAT | 58 | #ifdef CONFIG_COMPAT |
| 59 | .compat_ioctl = ext3_compat_ioctl, | 59 | .compat_ioctl = ext3_compat_ioctl, |
| @@ -63,7 +63,7 @@ const struct file_operations ext3_file_operations = { | |||
| 63 | .release = ext3_release_file, | 63 | .release = ext3_release_file, |
| 64 | .fsync = ext3_sync_file, | 64 | .fsync = ext3_sync_file, |
| 65 | .splice_read = generic_file_splice_read, | 65 | .splice_read = generic_file_splice_read, |
| 66 | .splice_write = generic_file_splice_write, | 66 | .splice_write = iter_file_splice_write, |
| 67 | }; | 67 | }; |
| 68 | 68 | ||
| 69 | const struct inode_operations ext3_file_inode_operations = { | 69 | const struct inode_operations ext3_file_inode_operations = { |
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 695abe738a24..2c6ccc49ba27 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c | |||
| @@ -1821,8 +1821,7 @@ static int ext3_releasepage(struct page *page, gfp_t wait) | |||
| 1821 | * VFS code falls back into buffered path in that case so we are safe. | 1821 | * VFS code falls back into buffered path in that case so we are safe. |
| 1822 | */ | 1822 | */ |
| 1823 | static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb, | 1823 | static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb, |
| 1824 | const struct iovec *iov, loff_t offset, | 1824 | struct iov_iter *iter, loff_t offset) |
| 1825 | unsigned long nr_segs) | ||
| 1826 | { | 1825 | { |
| 1827 | struct file *file = iocb->ki_filp; | 1826 | struct file *file = iocb->ki_filp; |
| 1828 | struct inode *inode = file->f_mapping->host; | 1827 | struct inode *inode = file->f_mapping->host; |
| @@ -1830,10 +1829,10 @@ static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb, | |||
| 1830 | handle_t *handle; | 1829 | handle_t *handle; |
| 1831 | ssize_t ret; | 1830 | ssize_t ret; |
| 1832 | int orphan = 0; | 1831 | int orphan = 0; |
| 1833 | size_t count = iov_length(iov, nr_segs); | 1832 | size_t count = iov_iter_count(iter); |
| 1834 | int retries = 0; | 1833 | int retries = 0; |
| 1835 | 1834 | ||
| 1836 | trace_ext3_direct_IO_enter(inode, offset, iov_length(iov, nr_segs), rw); | 1835 | trace_ext3_direct_IO_enter(inode, offset, count, rw); |
| 1837 | 1836 | ||
| 1838 | if (rw == WRITE) { | 1837 | if (rw == WRITE) { |
| 1839 | loff_t final_size = offset + count; | 1838 | loff_t final_size = offset + count; |
| @@ -1857,15 +1856,14 @@ static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb, | |||
| 1857 | } | 1856 | } |
| 1858 | 1857 | ||
| 1859 | retry: | 1858 | retry: |
| 1860 | ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs, | 1859 | ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, ext3_get_block); |
| 1861 | ext3_get_block); | ||
| 1862 | /* | 1860 | /* |
| 1863 | * In case of error extending write may have instantiated a few | 1861 | * In case of error extending write may have instantiated a few |
| 1864 | * blocks outside i_size. Trim these off again. | 1862 | * blocks outside i_size. Trim these off again. |
| 1865 | */ | 1863 | */ |
| 1866 | if (unlikely((rw & WRITE) && ret < 0)) { | 1864 | if (unlikely((rw & WRITE) && ret < 0)) { |
| 1867 | loff_t isize = i_size_read(inode); | 1865 | loff_t isize = i_size_read(inode); |
| 1868 | loff_t end = offset + iov_length(iov, nr_segs); | 1866 | loff_t end = offset + count; |
| 1869 | 1867 | ||
| 1870 | if (end > isize) | 1868 | if (end > isize) |
| 1871 | ext3_truncate_failed_direct_write(inode); | 1869 | ext3_truncate_failed_direct_write(inode); |
| @@ -1910,8 +1908,7 @@ retry: | |||
| 1910 | ret = err; | 1908 | ret = err; |
| 1911 | } | 1909 | } |
| 1912 | out: | 1910 | out: |
| 1913 | trace_ext3_direct_IO_exit(inode, offset, | 1911 | trace_ext3_direct_IO_exit(inode, offset, count, rw, ret); |
| 1914 | iov_length(iov, nr_segs), rw, ret); | ||
| 1915 | return ret; | 1912 | return ret; |
| 1916 | } | 1913 | } |
| 1917 | 1914 | ||
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 0762d143e252..fca382037ddd 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c | |||
| @@ -194,7 +194,16 @@ static void ext4_init_block_bitmap(struct super_block *sb, | |||
| 194 | if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) { | 194 | if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) { |
| 195 | ext4_error(sb, "Checksum bad for group %u", block_group); | 195 | ext4_error(sb, "Checksum bad for group %u", block_group); |
| 196 | grp = ext4_get_group_info(sb, block_group); | 196 | grp = ext4_get_group_info(sb, block_group); |
| 197 | if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp)) | ||
| 198 | percpu_counter_sub(&sbi->s_freeclusters_counter, | ||
| 199 | grp->bb_free); | ||
| 197 | set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state); | 200 | set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state); |
| 201 | if (!EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) { | ||
| 202 | int count; | ||
| 203 | count = ext4_free_inodes_count(sb, gdp); | ||
| 204 | percpu_counter_sub(&sbi->s_freeinodes_counter, | ||
| 205 | count); | ||
| 206 | } | ||
| 198 | set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state); | 207 | set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state); |
| 199 | return; | 208 | return; |
| 200 | } | 209 | } |
| @@ -359,6 +368,7 @@ static void ext4_validate_block_bitmap(struct super_block *sb, | |||
| 359 | { | 368 | { |
| 360 | ext4_fsblk_t blk; | 369 | ext4_fsblk_t blk; |
| 361 | struct ext4_group_info *grp = ext4_get_group_info(sb, block_group); | 370 | struct ext4_group_info *grp = ext4_get_group_info(sb, block_group); |
| 371 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
| 362 | 372 | ||
| 363 | if (buffer_verified(bh)) | 373 | if (buffer_verified(bh)) |
| 364 | return; | 374 | return; |
| @@ -369,6 +379,9 @@ static void ext4_validate_block_bitmap(struct super_block *sb, | |||
| 369 | ext4_unlock_group(sb, block_group); | 379 | ext4_unlock_group(sb, block_group); |
| 370 | ext4_error(sb, "bg %u: block %llu: invalid block bitmap", | 380 | ext4_error(sb, "bg %u: block %llu: invalid block bitmap", |
| 371 | block_group, blk); | 381 | block_group, blk); |
| 382 | if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp)) | ||
| 383 | percpu_counter_sub(&sbi->s_freeclusters_counter, | ||
| 384 | grp->bb_free); | ||
| 372 | set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state); | 385 | set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state); |
| 373 | return; | 386 | return; |
| 374 | } | 387 | } |
| @@ -376,6 +389,9 @@ static void ext4_validate_block_bitmap(struct super_block *sb, | |||
| 376 | desc, bh))) { | 389 | desc, bh))) { |
| 377 | ext4_unlock_group(sb, block_group); | 390 | ext4_unlock_group(sb, block_group); |
| 378 | ext4_error(sb, "bg %u: bad block bitmap checksum", block_group); | 391 | ext4_error(sb, "bg %u: bad block bitmap checksum", block_group); |
| 392 | if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp)) | ||
| 393 | percpu_counter_sub(&sbi->s_freeclusters_counter, | ||
| 394 | grp->bb_free); | ||
| 379 | set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state); | 395 | set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state); |
| 380 | return; | 396 | return; |
| 381 | } | 397 | } |
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 1479e2ae00d2..7cc5a0e23688 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h | |||
| @@ -2140,8 +2140,7 @@ extern void ext4_da_update_reserve_space(struct inode *inode, | |||
| 2140 | extern int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, | 2140 | extern int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, |
| 2141 | struct ext4_map_blocks *map, int flags); | 2141 | struct ext4_map_blocks *map, int flags); |
| 2142 | extern ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb, | 2142 | extern ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb, |
| 2143 | const struct iovec *iov, loff_t offset, | 2143 | struct iov_iter *iter, loff_t offset); |
| 2144 | unsigned long nr_segs); | ||
| 2145 | extern int ext4_ind_calc_metadata_amount(struct inode *inode, sector_t lblock); | 2144 | extern int ext4_ind_calc_metadata_amount(struct inode *inode, sector_t lblock); |
| 2146 | extern int ext4_ind_trans_blocks(struct inode *inode, int nrblocks); | 2145 | extern int ext4_ind_trans_blocks(struct inode *inode, int nrblocks); |
| 2147 | extern void ext4_ind_truncate(handle_t *, struct inode *inode); | 2146 | extern void ext4_ind_truncate(handle_t *, struct inode *inode); |
diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 4e8bc284ec0e..8695f70af1ef 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c | |||
| @@ -74,26 +74,22 @@ static void ext4_unwritten_wait(struct inode *inode) | |||
| 74 | * or one thread will zero the other's data, causing corruption. | 74 | * or one thread will zero the other's data, causing corruption. |
| 75 | */ | 75 | */ |
| 76 | static int | 76 | static int |
| 77 | ext4_unaligned_aio(struct inode *inode, const struct iovec *iov, | 77 | ext4_unaligned_aio(struct inode *inode, struct iov_iter *from, loff_t pos) |
| 78 | unsigned long nr_segs, loff_t pos) | ||
| 79 | { | 78 | { |
| 80 | struct super_block *sb = inode->i_sb; | 79 | struct super_block *sb = inode->i_sb; |
| 81 | int blockmask = sb->s_blocksize - 1; | 80 | int blockmask = sb->s_blocksize - 1; |
| 82 | size_t count = iov_length(iov, nr_segs); | ||
| 83 | loff_t final_size = pos + count; | ||
| 84 | 81 | ||
| 85 | if (pos >= i_size_read(inode)) | 82 | if (pos >= i_size_read(inode)) |
| 86 | return 0; | 83 | return 0; |
| 87 | 84 | ||
| 88 | if ((pos & blockmask) || (final_size & blockmask)) | 85 | if ((pos | iov_iter_alignment(from)) & blockmask) |
| 89 | return 1; | 86 | return 1; |
| 90 | 87 | ||
| 91 | return 0; | 88 | return 0; |
| 92 | } | 89 | } |
| 93 | 90 | ||
| 94 | static ssize_t | 91 | static ssize_t |
| 95 | ext4_file_write(struct kiocb *iocb, const struct iovec *iov, | 92 | ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from) |
| 96 | unsigned long nr_segs, loff_t pos) | ||
| 97 | { | 93 | { |
| 98 | struct file *file = iocb->ki_filp; | 94 | struct file *file = iocb->ki_filp; |
| 99 | struct inode *inode = file_inode(iocb->ki_filp); | 95 | struct inode *inode = file_inode(iocb->ki_filp); |
| @@ -101,10 +97,9 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov, | |||
| 101 | struct blk_plug plug; | 97 | struct blk_plug plug; |
| 102 | int o_direct = file->f_flags & O_DIRECT; | 98 | int o_direct = file->f_flags & O_DIRECT; |
| 103 | int overwrite = 0; | 99 | int overwrite = 0; |
| 104 | size_t length = iov_length(iov, nr_segs); | 100 | size_t length = iov_iter_count(from); |
| 105 | ssize_t ret; | 101 | ssize_t ret; |
| 106 | 102 | loff_t pos = iocb->ki_pos; | |
| 107 | BUG_ON(iocb->ki_pos != pos); | ||
| 108 | 103 | ||
| 109 | /* | 104 | /* |
| 110 | * Unaligned direct AIO must be serialized; see comment above | 105 | * Unaligned direct AIO must be serialized; see comment above |
| @@ -114,7 +109,7 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov, | |||
| 114 | ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) && | 109 | ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) && |
| 115 | !is_sync_kiocb(iocb) && | 110 | !is_sync_kiocb(iocb) && |
| 116 | (file->f_flags & O_APPEND || | 111 | (file->f_flags & O_APPEND || |
| 117 | ext4_unaligned_aio(inode, iov, nr_segs, pos))) { | 112 | ext4_unaligned_aio(inode, from, pos))) { |
| 118 | aio_mutex = ext4_aio_mutex(inode); | 113 | aio_mutex = ext4_aio_mutex(inode); |
| 119 | mutex_lock(aio_mutex); | 114 | mutex_lock(aio_mutex); |
| 120 | ext4_unwritten_wait(inode); | 115 | ext4_unwritten_wait(inode); |
| @@ -138,10 +133,8 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov, | |||
| 138 | goto errout; | 133 | goto errout; |
| 139 | } | 134 | } |
| 140 | 135 | ||
| 141 | if (pos + length > sbi->s_bitmap_maxbytes) { | 136 | if (pos + length > sbi->s_bitmap_maxbytes) |
| 142 | nr_segs = iov_shorten((struct iovec *)iov, nr_segs, | 137 | iov_iter_truncate(from, sbi->s_bitmap_maxbytes - pos); |
| 143 | sbi->s_bitmap_maxbytes - pos); | ||
| 144 | } | ||
| 145 | } | 138 | } |
| 146 | 139 | ||
| 147 | if (o_direct) { | 140 | if (o_direct) { |
| @@ -179,7 +172,7 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov, | |||
| 179 | } | 172 | } |
| 180 | } | 173 | } |
| 181 | 174 | ||
| 182 | ret = __generic_file_aio_write(iocb, iov, nr_segs); | 175 | ret = __generic_file_write_iter(iocb, from); |
| 183 | mutex_unlock(&inode->i_mutex); | 176 | mutex_unlock(&inode->i_mutex); |
| 184 | 177 | ||
| 185 | if (ret > 0) { | 178 | if (ret > 0) { |
| @@ -594,10 +587,10 @@ loff_t ext4_llseek(struct file *file, loff_t offset, int whence) | |||
| 594 | 587 | ||
| 595 | const struct file_operations ext4_file_operations = { | 588 | const struct file_operations ext4_file_operations = { |
| 596 | .llseek = ext4_llseek, | 589 | .llseek = ext4_llseek, |
| 597 | .read = do_sync_read, | 590 | .read = new_sync_read, |
| 598 | .write = do_sync_write, | 591 | .write = new_sync_write, |
| 599 | .aio_read = generic_file_aio_read, | 592 | .read_iter = generic_file_read_iter, |
| 600 | .aio_write = ext4_file_write, | 593 | .write_iter = ext4_file_write_iter, |
| 601 | .unlocked_ioctl = ext4_ioctl, | 594 | .unlocked_ioctl = ext4_ioctl, |
| 602 | #ifdef CONFIG_COMPAT | 595 | #ifdef CONFIG_COMPAT |
| 603 | .compat_ioctl = ext4_compat_ioctl, | 596 | .compat_ioctl = ext4_compat_ioctl, |
| @@ -607,7 +600,7 @@ const struct file_operations ext4_file_operations = { | |||
| 607 | .release = ext4_release_file, | 600 | .release = ext4_release_file, |
| 608 | .fsync = ext4_sync_file, | 601 | .fsync = ext4_sync_file, |
| 609 | .splice_read = generic_file_splice_read, | 602 | .splice_read = generic_file_splice_read, |
| 610 | .splice_write = generic_file_splice_write, | 603 | .splice_write = iter_file_splice_write, |
| 611 | .fallocate = ext4_fallocate, | 604 | .fallocate = ext4_fallocate, |
| 612 | }; | 605 | }; |
| 613 | 606 | ||
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 0ee59a6644e2..a87455df38bc 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c | |||
| @@ -71,6 +71,7 @@ static unsigned ext4_init_inode_bitmap(struct super_block *sb, | |||
| 71 | struct ext4_group_desc *gdp) | 71 | struct ext4_group_desc *gdp) |
| 72 | { | 72 | { |
| 73 | struct ext4_group_info *grp; | 73 | struct ext4_group_info *grp; |
| 74 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
| 74 | J_ASSERT_BH(bh, buffer_locked(bh)); | 75 | J_ASSERT_BH(bh, buffer_locked(bh)); |
| 75 | 76 | ||
| 76 | /* If checksum is bad mark all blocks and inodes use to prevent | 77 | /* If checksum is bad mark all blocks and inodes use to prevent |
| @@ -78,7 +79,16 @@ static unsigned ext4_init_inode_bitmap(struct super_block *sb, | |||
| 78 | if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) { | 79 | if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) { |
| 79 | ext4_error(sb, "Checksum bad for group %u", block_group); | 80 | ext4_error(sb, "Checksum bad for group %u", block_group); |
| 80 | grp = ext4_get_group_info(sb, block_group); | 81 | grp = ext4_get_group_info(sb, block_group); |
| 82 | if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp)) | ||
| 83 | percpu_counter_sub(&sbi->s_freeclusters_counter, | ||
| 84 | grp->bb_free); | ||
| 81 | set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state); | 85 | set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state); |
| 86 | if (!EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) { | ||
| 87 | int count; | ||
| 88 | count = ext4_free_inodes_count(sb, gdp); | ||
| 89 | percpu_counter_sub(&sbi->s_freeinodes_counter, | ||
| 90 | count); | ||
| 91 | } | ||
| 82 | set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state); | 92 | set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state); |
| 83 | return 0; | 93 | return 0; |
| 84 | } | 94 | } |
| @@ -116,6 +126,7 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) | |||
| 116 | struct buffer_head *bh = NULL; | 126 | struct buffer_head *bh = NULL; |
| 117 | ext4_fsblk_t bitmap_blk; | 127 | ext4_fsblk_t bitmap_blk; |
| 118 | struct ext4_group_info *grp; | 128 | struct ext4_group_info *grp; |
| 129 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
| 119 | 130 | ||
| 120 | desc = ext4_get_group_desc(sb, block_group, NULL); | 131 | desc = ext4_get_group_desc(sb, block_group, NULL); |
| 121 | if (!desc) | 132 | if (!desc) |
| @@ -185,6 +196,12 @@ verify: | |||
| 185 | ext4_error(sb, "Corrupt inode bitmap - block_group = %u, " | 196 | ext4_error(sb, "Corrupt inode bitmap - block_group = %u, " |
| 186 | "inode_bitmap = %llu", block_group, bitmap_blk); | 197 | "inode_bitmap = %llu", block_group, bitmap_blk); |
| 187 | grp = ext4_get_group_info(sb, block_group); | 198 | grp = ext4_get_group_info(sb, block_group); |
| 199 | if (!EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) { | ||
| 200 | int count; | ||
| 201 | count = ext4_free_inodes_count(sb, desc); | ||
| 202 | percpu_counter_sub(&sbi->s_freeinodes_counter, | ||
| 203 | count); | ||
| 204 | } | ||
| 188 | set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state); | 205 | set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state); |
| 189 | return NULL; | 206 | return NULL; |
| 190 | } | 207 | } |
| @@ -321,6 +338,12 @@ out: | |||
| 321 | fatal = err; | 338 | fatal = err; |
| 322 | } else { | 339 | } else { |
| 323 | ext4_error(sb, "bit already cleared for inode %lu", ino); | 340 | ext4_error(sb, "bit already cleared for inode %lu", ino); |
| 341 | if (!EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) { | ||
| 342 | int count; | ||
| 343 | count = ext4_free_inodes_count(sb, gdp); | ||
| 344 | percpu_counter_sub(&sbi->s_freeinodes_counter, | ||
| 345 | count); | ||
| 346 | } | ||
| 324 | set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state); | 347 | set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state); |
| 325 | } | 348 | } |
| 326 | 349 | ||
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c index 594009f5f523..fd69da194826 100644 --- a/fs/ext4/indirect.c +++ b/fs/ext4/indirect.c | |||
| @@ -389,7 +389,13 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode, | |||
| 389 | return 0; | 389 | return 0; |
| 390 | failed: | 390 | failed: |
| 391 | for (; i >= 0; i--) { | 391 | for (; i >= 0; i--) { |
| 392 | if (i != indirect_blks && branch[i].bh) | 392 | /* |
| 393 | * We want to ext4_forget() only freshly allocated indirect | ||
| 394 | * blocks. Buffer for new_blocks[i-1] is at branch[i].bh and | ||
| 395 | * buffer at branch[0].bh is indirect block / inode already | ||
| 396 | * existing before ext4_alloc_branch() was called. | ||
| 397 | */ | ||
| 398 | if (i > 0 && i != indirect_blks && branch[i].bh) | ||
| 393 | ext4_forget(handle, 1, inode, branch[i].bh, | 399 | ext4_forget(handle, 1, inode, branch[i].bh, |
| 394 | branch[i].bh->b_blocknr); | 400 | branch[i].bh->b_blocknr); |
| 395 | ext4_free_blocks(handle, inode, NULL, new_blocks[i], | 401 | ext4_free_blocks(handle, inode, NULL, new_blocks[i], |
| @@ -639,8 +645,7 @@ out: | |||
| 639 | * VFS code falls back into buffered path in that case so we are safe. | 645 | * VFS code falls back into buffered path in that case so we are safe. |
| 640 | */ | 646 | */ |
| 641 | ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb, | 647 | ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb, |
| 642 | const struct iovec *iov, loff_t offset, | 648 | struct iov_iter *iter, loff_t offset) |
| 643 | unsigned long nr_segs) | ||
| 644 | { | 649 | { |
| 645 | struct file *file = iocb->ki_filp; | 650 | struct file *file = iocb->ki_filp; |
| 646 | struct inode *inode = file->f_mapping->host; | 651 | struct inode *inode = file->f_mapping->host; |
| @@ -648,7 +653,7 @@ ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb, | |||
| 648 | handle_t *handle; | 653 | handle_t *handle; |
| 649 | ssize_t ret; | 654 | ssize_t ret; |
| 650 | int orphan = 0; | 655 | int orphan = 0; |
| 651 | size_t count = iov_length(iov, nr_segs); | 656 | size_t count = iov_iter_count(iter); |
| 652 | int retries = 0; | 657 | int retries = 0; |
| 653 | 658 | ||
| 654 | if (rw == WRITE) { | 659 | if (rw == WRITE) { |
| @@ -687,18 +692,17 @@ retry: | |||
| 687 | goto locked; | 692 | goto locked; |
| 688 | } | 693 | } |
| 689 | ret = __blockdev_direct_IO(rw, iocb, inode, | 694 | ret = __blockdev_direct_IO(rw, iocb, inode, |
| 690 | inode->i_sb->s_bdev, iov, | 695 | inode->i_sb->s_bdev, iter, offset, |
| 691 | offset, nr_segs, | ||
| 692 | ext4_get_block, NULL, NULL, 0); | 696 | ext4_get_block, NULL, NULL, 0); |
| 693 | inode_dio_done(inode); | 697 | inode_dio_done(inode); |
| 694 | } else { | 698 | } else { |
| 695 | locked: | 699 | locked: |
| 696 | ret = blockdev_direct_IO(rw, iocb, inode, iov, | 700 | ret = blockdev_direct_IO(rw, iocb, inode, iter, |
| 697 | offset, nr_segs, ext4_get_block); | 701 | offset, ext4_get_block); |
| 698 | 702 | ||
| 699 | if (unlikely((rw & WRITE) && ret < 0)) { | 703 | if (unlikely((rw & WRITE) && ret < 0)) { |
| 700 | loff_t isize = i_size_read(inode); | 704 | loff_t isize = i_size_read(inode); |
| 701 | loff_t end = offset + iov_length(iov, nr_segs); | 705 | loff_t end = offset + count; |
| 702 | 706 | ||
| 703 | if (end > isize) | 707 | if (end > isize) |
| 704 | ext4_truncate_failed_write(inode); | 708 | ext4_truncate_failed_write(inode); |
| @@ -1312,16 +1316,24 @@ static int free_hole_blocks(handle_t *handle, struct inode *inode, | |||
| 1312 | blk = *i_data; | 1316 | blk = *i_data; |
| 1313 | if (level > 0) { | 1317 | if (level > 0) { |
| 1314 | ext4_lblk_t first2; | 1318 | ext4_lblk_t first2; |
| 1319 | ext4_lblk_t count2; | ||
| 1320 | |||
| 1315 | bh = sb_bread(inode->i_sb, le32_to_cpu(blk)); | 1321 | bh = sb_bread(inode->i_sb, le32_to_cpu(blk)); |
| 1316 | if (!bh) { | 1322 | if (!bh) { |
| 1317 | EXT4_ERROR_INODE_BLOCK(inode, le32_to_cpu(blk), | 1323 | EXT4_ERROR_INODE_BLOCK(inode, le32_to_cpu(blk), |
| 1318 | "Read failure"); | 1324 | "Read failure"); |
| 1319 | return -EIO; | 1325 | return -EIO; |
| 1320 | } | 1326 | } |
| 1321 | first2 = (first > offset) ? first - offset : 0; | 1327 | if (first > offset) { |
| 1328 | first2 = first - offset; | ||
| 1329 | count2 = count; | ||
| 1330 | } else { | ||
| 1331 | first2 = 0; | ||
| 1332 | count2 = count - (offset - first); | ||
| 1333 | } | ||
| 1322 | ret = free_hole_blocks(handle, inode, bh, | 1334 | ret = free_hole_blocks(handle, inode, bh, |
| 1323 | (__le32 *)bh->b_data, level - 1, | 1335 | (__le32 *)bh->b_data, level - 1, |
| 1324 | first2, count - offset, | 1336 | first2, count2, |
| 1325 | inode->i_sb->s_blocksize >> 2); | 1337 | inode->i_sb->s_blocksize >> 2); |
| 1326 | if (ret) { | 1338 | if (ret) { |
| 1327 | brelse(bh); | 1339 | brelse(bh); |
| @@ -1331,8 +1343,8 @@ static int free_hole_blocks(handle_t *handle, struct inode *inode, | |||
| 1331 | if (level == 0 || | 1343 | if (level == 0 || |
| 1332 | (bh && all_zeroes((__le32 *)bh->b_data, | 1344 | (bh && all_zeroes((__le32 *)bh->b_data, |
| 1333 | (__le32 *)bh->b_data + addr_per_block))) { | 1345 | (__le32 *)bh->b_data + addr_per_block))) { |
| 1334 | ext4_free_data(handle, inode, parent_bh, &blk, &blk+1); | 1346 | ext4_free_data(handle, inode, parent_bh, |
| 1335 | *i_data = 0; | 1347 | i_data, i_data + 1); |
| 1336 | } | 1348 | } |
| 1337 | brelse(bh); | 1349 | brelse(bh); |
| 1338 | bh = NULL; | 1350 | bh = NULL; |
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 7fcd68ee9155..8a064734e6eb 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
| @@ -3093,13 +3093,12 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, | |||
| 3093 | * | 3093 | * |
| 3094 | */ | 3094 | */ |
| 3095 | static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, | 3095 | static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, |
| 3096 | const struct iovec *iov, loff_t offset, | 3096 | struct iov_iter *iter, loff_t offset) |
| 3097 | unsigned long nr_segs) | ||
| 3098 | { | 3097 | { |
| 3099 | struct file *file = iocb->ki_filp; | 3098 | struct file *file = iocb->ki_filp; |
| 3100 | struct inode *inode = file->f_mapping->host; | 3099 | struct inode *inode = file->f_mapping->host; |
| 3101 | ssize_t ret; | 3100 | ssize_t ret; |
| 3102 | size_t count = iov_length(iov, nr_segs); | 3101 | size_t count = iov_iter_count(iter); |
| 3103 | int overwrite = 0; | 3102 | int overwrite = 0; |
| 3104 | get_block_t *get_block_func = NULL; | 3103 | get_block_t *get_block_func = NULL; |
| 3105 | int dio_flags = 0; | 3104 | int dio_flags = 0; |
| @@ -3108,7 +3107,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, | |||
| 3108 | 3107 | ||
| 3109 | /* Use the old path for reads and writes beyond i_size. */ | 3108 | /* Use the old path for reads and writes beyond i_size. */ |
| 3110 | if (rw != WRITE || final_size > inode->i_size) | 3109 | if (rw != WRITE || final_size > inode->i_size) |
| 3111 | return ext4_ind_direct_IO(rw, iocb, iov, offset, nr_segs); | 3110 | return ext4_ind_direct_IO(rw, iocb, iter, offset); |
| 3112 | 3111 | ||
| 3113 | BUG_ON(iocb->private == NULL); | 3112 | BUG_ON(iocb->private == NULL); |
| 3114 | 3113 | ||
| @@ -3175,8 +3174,8 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, | |||
| 3175 | dio_flags = DIO_LOCKING; | 3174 | dio_flags = DIO_LOCKING; |
| 3176 | } | 3175 | } |
| 3177 | ret = __blockdev_direct_IO(rw, iocb, inode, | 3176 | ret = __blockdev_direct_IO(rw, iocb, inode, |
| 3178 | inode->i_sb->s_bdev, iov, | 3177 | inode->i_sb->s_bdev, iter, |
| 3179 | offset, nr_segs, | 3178 | offset, |
| 3180 | get_block_func, | 3179 | get_block_func, |
| 3181 | ext4_end_io_dio, | 3180 | ext4_end_io_dio, |
| 3182 | NULL, | 3181 | NULL, |
| @@ -3230,11 +3229,11 @@ retake_lock: | |||
| 3230 | } | 3229 | } |
| 3231 | 3230 | ||
| 3232 | static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb, | 3231 | static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb, |
| 3233 | const struct iovec *iov, loff_t offset, | 3232 | struct iov_iter *iter, loff_t offset) |
| 3234 | unsigned long nr_segs) | ||
| 3235 | { | 3233 | { |
| 3236 | struct file *file = iocb->ki_filp; | 3234 | struct file *file = iocb->ki_filp; |
| 3237 | struct inode *inode = file->f_mapping->host; | 3235 | struct inode *inode = file->f_mapping->host; |
| 3236 | size_t count = iov_iter_count(iter); | ||
| 3238 | ssize_t ret; | 3237 | ssize_t ret; |
| 3239 | 3238 | ||
| 3240 | /* | 3239 | /* |
| @@ -3247,13 +3246,12 @@ static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb, | |||
| 3247 | if (ext4_has_inline_data(inode)) | 3246 | if (ext4_has_inline_data(inode)) |
| 3248 | return 0; | 3247 | return 0; |
| 3249 | 3248 | ||
| 3250 | trace_ext4_direct_IO_enter(inode, offset, iov_length(iov, nr_segs), rw); | 3249 | trace_ext4_direct_IO_enter(inode, offset, count, rw); |
| 3251 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) | 3250 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) |
| 3252 | ret = ext4_ext_direct_IO(rw, iocb, iov, offset, nr_segs); | 3251 | ret = ext4_ext_direct_IO(rw, iocb, iter, offset); |
| 3253 | else | 3252 | else |
| 3254 | ret = ext4_ind_direct_IO(rw, iocb, iov, offset, nr_segs); | 3253 | ret = ext4_ind_direct_IO(rw, iocb, iter, offset); |
| 3255 | trace_ext4_direct_IO_exit(inode, offset, | 3254 | trace_ext4_direct_IO_exit(inode, offset, count, rw, ret); |
| 3256 | iov_length(iov, nr_segs), rw, ret); | ||
| 3257 | return ret; | 3255 | return ret; |
| 3258 | } | 3256 | } |
| 3259 | 3257 | ||
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 59e31622cc6e..7f72f50a8fa7 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c | |||
| @@ -722,6 +722,7 @@ void ext4_mb_generate_buddy(struct super_block *sb, | |||
| 722 | void *buddy, void *bitmap, ext4_group_t group) | 722 | void *buddy, void *bitmap, ext4_group_t group) |
| 723 | { | 723 | { |
| 724 | struct ext4_group_info *grp = ext4_get_group_info(sb, group); | 724 | struct ext4_group_info *grp = ext4_get_group_info(sb, group); |
| 725 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
| 725 | ext4_grpblk_t max = EXT4_CLUSTERS_PER_GROUP(sb); | 726 | ext4_grpblk_t max = EXT4_CLUSTERS_PER_GROUP(sb); |
| 726 | ext4_grpblk_t i = 0; | 727 | ext4_grpblk_t i = 0; |
| 727 | ext4_grpblk_t first; | 728 | ext4_grpblk_t first; |
| @@ -759,6 +760,9 @@ void ext4_mb_generate_buddy(struct super_block *sb, | |||
| 759 | * corrupt and update bb_free using bitmap value | 760 | * corrupt and update bb_free using bitmap value |
| 760 | */ | 761 | */ |
| 761 | grp->bb_free = free; | 762 | grp->bb_free = free; |
| 763 | if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp)) | ||
| 764 | percpu_counter_sub(&sbi->s_freeclusters_counter, | ||
| 765 | grp->bb_free); | ||
| 762 | set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state); | 766 | set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state); |
| 763 | } | 767 | } |
| 764 | mb_set_largest_free_order(sb, grp); | 768 | mb_set_largest_free_order(sb, grp); |
| @@ -1431,6 +1435,7 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b, | |||
| 1431 | right_is_free = !mb_test_bit(last + 1, e4b->bd_bitmap); | 1435 | right_is_free = !mb_test_bit(last + 1, e4b->bd_bitmap); |
| 1432 | 1436 | ||
| 1433 | if (unlikely(block != -1)) { | 1437 | if (unlikely(block != -1)) { |
| 1438 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
| 1434 | ext4_fsblk_t blocknr; | 1439 | ext4_fsblk_t blocknr; |
| 1435 | 1440 | ||
| 1436 | blocknr = ext4_group_first_block_no(sb, e4b->bd_group); | 1441 | blocknr = ext4_group_first_block_no(sb, e4b->bd_group); |
| @@ -1441,6 +1446,9 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b, | |||
| 1441 | "freeing already freed block " | 1446 | "freeing already freed block " |
| 1442 | "(bit %u); block bitmap corrupt.", | 1447 | "(bit %u); block bitmap corrupt.", |
| 1443 | block); | 1448 | block); |
| 1449 | if (!EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info)) | ||
| 1450 | percpu_counter_sub(&sbi->s_freeclusters_counter, | ||
| 1451 | e4b->bd_info->bb_free); | ||
| 1444 | /* Mark the block group as corrupt. */ | 1452 | /* Mark the block group as corrupt. */ |
| 1445 | set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, | 1453 | set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, |
| 1446 | &e4b->bd_info->bb_state); | 1454 | &e4b->bd_info->bb_state); |
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index c1fb6dd10911..0924521306b4 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c | |||
| @@ -1017,10 +1017,9 @@ static int f2fs_write_end(struct file *file, | |||
| 1017 | } | 1017 | } |
| 1018 | 1018 | ||
| 1019 | static int check_direct_IO(struct inode *inode, int rw, | 1019 | static int check_direct_IO(struct inode *inode, int rw, |
| 1020 | const struct iovec *iov, loff_t offset, unsigned long nr_segs) | 1020 | struct iov_iter *iter, loff_t offset) |
| 1021 | { | 1021 | { |
| 1022 | unsigned blocksize_mask = inode->i_sb->s_blocksize - 1; | 1022 | unsigned blocksize_mask = inode->i_sb->s_blocksize - 1; |
| 1023 | int i; | ||
| 1024 | 1023 | ||
| 1025 | if (rw == READ) | 1024 | if (rw == READ) |
| 1026 | return 0; | 1025 | return 0; |
| @@ -1028,14 +1027,14 @@ static int check_direct_IO(struct inode *inode, int rw, | |||
| 1028 | if (offset & blocksize_mask) | 1027 | if (offset & blocksize_mask) |
| 1029 | return -EINVAL; | 1028 | return -EINVAL; |
| 1030 | 1029 | ||
| 1031 | for (i = 0; i < nr_segs; i++) | 1030 | if (iov_iter_alignment(iter) & blocksize_mask) |
| 1032 | if (iov[i].iov_len & blocksize_mask) | 1031 | return -EINVAL; |
| 1033 | return -EINVAL; | 1032 | |
| 1034 | return 0; | 1033 | return 0; |
| 1035 | } | 1034 | } |
| 1036 | 1035 | ||
| 1037 | static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb, | 1036 | static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb, |
| 1038 | const struct iovec *iov, loff_t offset, unsigned long nr_segs) | 1037 | struct iov_iter *iter, loff_t offset) |
| 1039 | { | 1038 | { |
| 1040 | struct file *file = iocb->ki_filp; | 1039 | struct file *file = iocb->ki_filp; |
| 1041 | struct inode *inode = file->f_mapping->host; | 1040 | struct inode *inode = file->f_mapping->host; |
| @@ -1044,14 +1043,14 @@ static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb, | |||
| 1044 | if (f2fs_has_inline_data(inode)) | 1043 | if (f2fs_has_inline_data(inode)) |
| 1045 | return 0; | 1044 | return 0; |
| 1046 | 1045 | ||
| 1047 | if (check_direct_IO(inode, rw, iov, offset, nr_segs)) | 1046 | if (check_direct_IO(inode, rw, iter, offset)) |
| 1048 | return 0; | 1047 | return 0; |
| 1049 | 1048 | ||
| 1050 | /* clear fsync mark to recover these blocks */ | 1049 | /* clear fsync mark to recover these blocks */ |
| 1051 | fsync_mark_clear(F2FS_SB(inode->i_sb), inode->i_ino); | 1050 | fsync_mark_clear(F2FS_SB(inode->i_sb), inode->i_ino); |
| 1052 | 1051 | ||
| 1053 | return blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs, | 1052 | return blockdev_direct_IO(rw, iocb, inode, iter, offset, |
| 1054 | get_data_block); | 1053 | get_data_block); |
| 1055 | } | 1054 | } |
| 1056 | 1055 | ||
| 1057 | static void f2fs_invalidate_data_page(struct page *page, unsigned int offset, | 1056 | static void f2fs_invalidate_data_page(struct page *page, unsigned int offset, |
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 9c49c593d8eb..c58e33075719 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c | |||
| @@ -808,10 +808,10 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) | |||
| 808 | 808 | ||
| 809 | const struct file_operations f2fs_file_operations = { | 809 | const struct file_operations f2fs_file_operations = { |
| 810 | .llseek = f2fs_llseek, | 810 | .llseek = f2fs_llseek, |
| 811 | .read = do_sync_read, | 811 | .read = new_sync_read, |
| 812 | .write = do_sync_write, | 812 | .write = new_sync_write, |
| 813 | .aio_read = generic_file_aio_read, | 813 | .read_iter = generic_file_read_iter, |
| 814 | .aio_write = generic_file_aio_write, | 814 | .write_iter = generic_file_write_iter, |
| 815 | .open = generic_file_open, | 815 | .open = generic_file_open, |
| 816 | .mmap = f2fs_file_mmap, | 816 | .mmap = f2fs_file_mmap, |
| 817 | .fsync = f2fs_sync_file, | 817 | .fsync = f2fs_sync_file, |
| @@ -821,5 +821,5 @@ const struct file_operations f2fs_file_operations = { | |||
| 821 | .compat_ioctl = f2fs_compat_ioctl, | 821 | .compat_ioctl = f2fs_compat_ioctl, |
| 822 | #endif | 822 | #endif |
| 823 | .splice_read = generic_file_splice_read, | 823 | .splice_read = generic_file_splice_read, |
| 824 | .splice_write = generic_file_splice_write, | 824 | .splice_write = iter_file_splice_write, |
| 825 | }; | 825 | }; |
diff --git a/fs/fat/file.c b/fs/fat/file.c index 9b104f543056..85f79a89e747 100644 --- a/fs/fat/file.c +++ b/fs/fat/file.c | |||
| @@ -170,10 +170,10 @@ int fat_file_fsync(struct file *filp, loff_t start, loff_t end, int datasync) | |||
| 170 | 170 | ||
| 171 | const struct file_operations fat_file_operations = { | 171 | const struct file_operations fat_file_operations = { |
| 172 | .llseek = generic_file_llseek, | 172 | .llseek = generic_file_llseek, |
| 173 | .read = do_sync_read, | 173 | .read = new_sync_read, |
| 174 | .write = do_sync_write, | 174 | .write = new_sync_write, |
| 175 | .aio_read = generic_file_aio_read, | 175 | .read_iter = generic_file_read_iter, |
| 176 | .aio_write = generic_file_aio_write, | 176 | .write_iter = generic_file_write_iter, |
| 177 | .mmap = generic_file_mmap, | 177 | .mmap = generic_file_mmap, |
| 178 | .release = fat_file_release, | 178 | .release = fat_file_release, |
| 179 | .unlocked_ioctl = fat_generic_ioctl, | 179 | .unlocked_ioctl = fat_generic_ioctl, |
diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 9c83594d7fb5..756aead10d96 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c | |||
| @@ -247,12 +247,13 @@ static int fat_write_end(struct file *file, struct address_space *mapping, | |||
| 247 | } | 247 | } |
| 248 | 248 | ||
| 249 | static ssize_t fat_direct_IO(int rw, struct kiocb *iocb, | 249 | static ssize_t fat_direct_IO(int rw, struct kiocb *iocb, |
| 250 | const struct iovec *iov, | 250 | struct iov_iter *iter, |
| 251 | loff_t offset, unsigned long nr_segs) | 251 | loff_t offset) |
| 252 | { | 252 | { |
| 253 | struct file *file = iocb->ki_filp; | 253 | struct file *file = iocb->ki_filp; |
| 254 | struct address_space *mapping = file->f_mapping; | 254 | struct address_space *mapping = file->f_mapping; |
| 255 | struct inode *inode = mapping->host; | 255 | struct inode *inode = mapping->host; |
| 256 | size_t count = iov_iter_count(iter); | ||
| 256 | ssize_t ret; | 257 | ssize_t ret; |
| 257 | 258 | ||
| 258 | if (rw == WRITE) { | 259 | if (rw == WRITE) { |
| @@ -265,7 +266,7 @@ static ssize_t fat_direct_IO(int rw, struct kiocb *iocb, | |||
| 265 | * | 266 | * |
| 266 | * Return 0, and fallback to normal buffered write. | 267 | * Return 0, and fallback to normal buffered write. |
| 267 | */ | 268 | */ |
| 268 | loff_t size = offset + iov_length(iov, nr_segs); | 269 | loff_t size = offset + count; |
| 269 | if (MSDOS_I(inode)->mmu_private < size) | 270 | if (MSDOS_I(inode)->mmu_private < size) |
| 270 | return 0; | 271 | return 0; |
| 271 | } | 272 | } |
| @@ -274,10 +275,9 @@ static ssize_t fat_direct_IO(int rw, struct kiocb *iocb, | |||
| 274 | * FAT need to use the DIO_LOCKING for avoiding the race | 275 | * FAT need to use the DIO_LOCKING for avoiding the race |
| 275 | * condition of fat_get_block() and ->truncate(). | 276 | * condition of fat_get_block() and ->truncate(). |
| 276 | */ | 277 | */ |
| 277 | ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs, | 278 | ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, fat_get_block); |
| 278 | fat_get_block); | ||
| 279 | if (ret < 0 && (rw & WRITE)) | 279 | if (ret < 0 && (rw & WRITE)) |
| 280 | fat_write_failed(mapping, offset + iov_length(iov, nr_segs)); | 280 | fat_write_failed(mapping, offset + count); |
| 281 | 281 | ||
| 282 | return ret; | 282 | return ret; |
| 283 | } | 283 | } |
| @@ -44,15 +44,10 @@ static void *alloc_fdmem(size_t size) | |||
| 44 | return vmalloc(size); | 44 | return vmalloc(size); |
| 45 | } | 45 | } |
| 46 | 46 | ||
| 47 | static void free_fdmem(void *ptr) | ||
| 48 | { | ||
| 49 | is_vmalloc_addr(ptr) ? vfree(ptr) : kfree(ptr); | ||
| 50 | } | ||
| 51 | |||
| 52 | static void __free_fdtable(struct fdtable *fdt) | 47 | static void __free_fdtable(struct fdtable *fdt) |
| 53 | { | 48 | { |
| 54 | free_fdmem(fdt->fd); | 49 | kvfree(fdt->fd); |
| 55 | free_fdmem(fdt->open_fds); | 50 | kvfree(fdt->open_fds); |
| 56 | kfree(fdt); | 51 | kfree(fdt); |
| 57 | } | 52 | } |
| 58 | 53 | ||
| @@ -130,7 +125,7 @@ static struct fdtable * alloc_fdtable(unsigned int nr) | |||
| 130 | return fdt; | 125 | return fdt; |
| 131 | 126 | ||
| 132 | out_arr: | 127 | out_arr: |
| 133 | free_fdmem(fdt->fd); | 128 | kvfree(fdt->fd); |
| 134 | out_fdt: | 129 | out_fdt: |
| 135 | kfree(fdt); | 130 | kfree(fdt); |
| 136 | out: | 131 | out: |
diff --git a/fs/file_table.c b/fs/file_table.c index 40bf4660f0a3..385bfd31512a 100644 --- a/fs/file_table.c +++ b/fs/file_table.c | |||
| @@ -175,6 +175,12 @@ struct file *alloc_file(struct path *path, fmode_t mode, | |||
| 175 | file->f_path = *path; | 175 | file->f_path = *path; |
| 176 | file->f_inode = path->dentry->d_inode; | 176 | file->f_inode = path->dentry->d_inode; |
| 177 | file->f_mapping = path->dentry->d_inode->i_mapping; | 177 | file->f_mapping = path->dentry->d_inode->i_mapping; |
| 178 | if ((mode & FMODE_READ) && | ||
| 179 | likely(fop->read || fop->aio_read || fop->read_iter)) | ||
| 180 | mode |= FMODE_CAN_READ; | ||
| 181 | if ((mode & FMODE_WRITE) && | ||
| 182 | likely(fop->write || fop->aio_write || fop->write_iter)) | ||
| 183 | mode |= FMODE_CAN_WRITE; | ||
| 178 | file->f_mode = mode; | 184 | file->f_mode = mode; |
| 179 | file->f_op = fop; | 185 | file->f_op = fop; |
| 180 | if ((mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ) | 186 | if ((mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ) |
diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c index 13b691a8a7d2..966ace8b243f 100644 --- a/fs/fuse/cuse.c +++ b/fs/fuse/cuse.c | |||
| @@ -94,8 +94,10 @@ static ssize_t cuse_read(struct file *file, char __user *buf, size_t count, | |||
| 94 | loff_t pos = 0; | 94 | loff_t pos = 0; |
| 95 | struct iovec iov = { .iov_base = buf, .iov_len = count }; | 95 | struct iovec iov = { .iov_base = buf, .iov_len = count }; |
| 96 | struct fuse_io_priv io = { .async = 0, .file = file }; | 96 | struct fuse_io_priv io = { .async = 0, .file = file }; |
| 97 | struct iov_iter ii; | ||
| 98 | iov_iter_init(&ii, READ, &iov, 1, count); | ||
| 97 | 99 | ||
| 98 | return fuse_direct_io(&io, &iov, 1, count, &pos, FUSE_DIO_CUSE); | 100 | return fuse_direct_io(&io, &ii, &pos, FUSE_DIO_CUSE); |
| 99 | } | 101 | } |
| 100 | 102 | ||
| 101 | static ssize_t cuse_write(struct file *file, const char __user *buf, | 103 | static ssize_t cuse_write(struct file *file, const char __user *buf, |
| @@ -104,12 +106,14 @@ static ssize_t cuse_write(struct file *file, const char __user *buf, | |||
| 104 | loff_t pos = 0; | 106 | loff_t pos = 0; |
| 105 | struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = count }; | 107 | struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = count }; |
| 106 | struct fuse_io_priv io = { .async = 0, .file = file }; | 108 | struct fuse_io_priv io = { .async = 0, .file = file }; |
| 109 | struct iov_iter ii; | ||
| 110 | iov_iter_init(&ii, WRITE, &iov, 1, count); | ||
| 107 | 111 | ||
| 108 | /* | 112 | /* |
| 109 | * No locking or generic_write_checks(), the server is | 113 | * No locking or generic_write_checks(), the server is |
| 110 | * responsible for locking and sanity checks. | 114 | * responsible for locking and sanity checks. |
| 111 | */ | 115 | */ |
| 112 | return fuse_direct_io(&io, &iov, 1, count, &pos, | 116 | return fuse_direct_io(&io, &ii, &pos, |
| 113 | FUSE_DIO_WRITE | FUSE_DIO_CUSE); | 117 | FUSE_DIO_WRITE | FUSE_DIO_CUSE); |
| 114 | } | 118 | } |
| 115 | 119 | ||
diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 903cbc9cd6bd..6e16dad13e9b 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c | |||
| @@ -933,8 +933,7 @@ out: | |||
| 933 | return err; | 933 | return err; |
| 934 | } | 934 | } |
| 935 | 935 | ||
| 936 | static ssize_t fuse_file_aio_read(struct kiocb *iocb, const struct iovec *iov, | 936 | static ssize_t fuse_file_read_iter(struct kiocb *iocb, struct iov_iter *to) |
| 937 | unsigned long nr_segs, loff_t pos) | ||
| 938 | { | 937 | { |
| 939 | struct inode *inode = iocb->ki_filp->f_mapping->host; | 938 | struct inode *inode = iocb->ki_filp->f_mapping->host; |
| 940 | struct fuse_conn *fc = get_fuse_conn(inode); | 939 | struct fuse_conn *fc = get_fuse_conn(inode); |
| @@ -945,14 +944,14 @@ static ssize_t fuse_file_aio_read(struct kiocb *iocb, const struct iovec *iov, | |||
| 945 | * i_size is up to date). | 944 | * i_size is up to date). |
| 946 | */ | 945 | */ |
| 947 | if (fc->auto_inval_data || | 946 | if (fc->auto_inval_data || |
| 948 | (pos + iov_length(iov, nr_segs) > i_size_read(inode))) { | 947 | (iocb->ki_pos + iov_iter_count(to) > i_size_read(inode))) { |
| 949 | int err; | 948 | int err; |
| 950 | err = fuse_update_attributes(inode, NULL, iocb->ki_filp, NULL); | 949 | err = fuse_update_attributes(inode, NULL, iocb->ki_filp, NULL); |
| 951 | if (err) | 950 | if (err) |
| 952 | return err; | 951 | return err; |
| 953 | } | 952 | } |
| 954 | 953 | ||
| 955 | return generic_file_aio_read(iocb, iov, nr_segs, pos); | 954 | return generic_file_read_iter(iocb, to); |
| 956 | } | 955 | } |
| 957 | 956 | ||
| 958 | static void fuse_write_fill(struct fuse_req *req, struct fuse_file *ff, | 957 | static void fuse_write_fill(struct fuse_req *req, struct fuse_file *ff, |
| @@ -1181,19 +1180,17 @@ static ssize_t fuse_perform_write(struct file *file, | |||
| 1181 | return res > 0 ? res : err; | 1180 | return res > 0 ? res : err; |
| 1182 | } | 1181 | } |
| 1183 | 1182 | ||
| 1184 | static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov, | 1183 | static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from) |
| 1185 | unsigned long nr_segs, loff_t pos) | ||
| 1186 | { | 1184 | { |
| 1187 | struct file *file = iocb->ki_filp; | 1185 | struct file *file = iocb->ki_filp; |
| 1188 | struct address_space *mapping = file->f_mapping; | 1186 | struct address_space *mapping = file->f_mapping; |
| 1189 | size_t count = 0; | 1187 | size_t count = iov_iter_count(from); |
| 1190 | size_t ocount = 0; | ||
| 1191 | ssize_t written = 0; | 1188 | ssize_t written = 0; |
| 1192 | ssize_t written_buffered = 0; | 1189 | ssize_t written_buffered = 0; |
| 1193 | struct inode *inode = mapping->host; | 1190 | struct inode *inode = mapping->host; |
| 1194 | ssize_t err; | 1191 | ssize_t err; |
| 1195 | struct iov_iter i; | ||
| 1196 | loff_t endbyte = 0; | 1192 | loff_t endbyte = 0; |
| 1193 | loff_t pos = iocb->ki_pos; | ||
| 1197 | 1194 | ||
| 1198 | if (get_fuse_conn(inode)->writeback_cache) { | 1195 | if (get_fuse_conn(inode)->writeback_cache) { |
| 1199 | /* Update size (EOF optimization) and mode (SUID clearing) */ | 1196 | /* Update size (EOF optimization) and mode (SUID clearing) */ |
| @@ -1201,17 +1198,9 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov, | |||
| 1201 | if (err) | 1198 | if (err) |
| 1202 | return err; | 1199 | return err; |
| 1203 | 1200 | ||
| 1204 | return generic_file_aio_write(iocb, iov, nr_segs, pos); | 1201 | return generic_file_write_iter(iocb, from); |
| 1205 | } | 1202 | } |
| 1206 | 1203 | ||
| 1207 | WARN_ON(iocb->ki_pos != pos); | ||
| 1208 | |||
| 1209 | ocount = 0; | ||
| 1210 | err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ); | ||
| 1211 | if (err) | ||
| 1212 | return err; | ||
| 1213 | |||
| 1214 | count = ocount; | ||
| 1215 | mutex_lock(&inode->i_mutex); | 1204 | mutex_lock(&inode->i_mutex); |
| 1216 | 1205 | ||
| 1217 | /* We can write back this queue in page reclaim */ | 1206 | /* We can write back this queue in page reclaim */ |
| @@ -1224,6 +1213,7 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov, | |||
| 1224 | if (count == 0) | 1213 | if (count == 0) |
| 1225 | goto out; | 1214 | goto out; |
| 1226 | 1215 | ||
| 1216 | iov_iter_truncate(from, count); | ||
| 1227 | err = file_remove_suid(file); | 1217 | err = file_remove_suid(file); |
| 1228 | if (err) | 1218 | if (err) |
| 1229 | goto out; | 1219 | goto out; |
| @@ -1233,16 +1223,13 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov, | |||
| 1233 | goto out; | 1223 | goto out; |
| 1234 | 1224 | ||
| 1235 | if (file->f_flags & O_DIRECT) { | 1225 | if (file->f_flags & O_DIRECT) { |
| 1236 | written = generic_file_direct_write(iocb, iov, &nr_segs, pos, | 1226 | written = generic_file_direct_write(iocb, from, pos); |
| 1237 | count, ocount); | 1227 | if (written < 0 || !iov_iter_count(from)) |
| 1238 | if (written < 0 || written == count) | ||
| 1239 | goto out; | 1228 | goto out; |
| 1240 | 1229 | ||
| 1241 | pos += written; | 1230 | pos += written; |
| 1242 | count -= written; | ||
| 1243 | 1231 | ||
| 1244 | iov_iter_init(&i, iov, nr_segs, count, written); | 1232 | written_buffered = fuse_perform_write(file, mapping, from, pos); |
| 1245 | written_buffered = fuse_perform_write(file, mapping, &i, pos); | ||
| 1246 | if (written_buffered < 0) { | 1233 | if (written_buffered < 0) { |
| 1247 | err = written_buffered; | 1234 | err = written_buffered; |
| 1248 | goto out; | 1235 | goto out; |
| @@ -1261,8 +1248,7 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov, | |||
| 1261 | written += written_buffered; | 1248 | written += written_buffered; |
| 1262 | iocb->ki_pos = pos + written_buffered; | 1249 | iocb->ki_pos = pos + written_buffered; |
| 1263 | } else { | 1250 | } else { |
| 1264 | iov_iter_init(&i, iov, nr_segs, count, 0); | 1251 | written = fuse_perform_write(file, mapping, from, pos); |
| 1265 | written = fuse_perform_write(file, mapping, &i, pos); | ||
| 1266 | if (written >= 0) | 1252 | if (written >= 0) |
| 1267 | iocb->ki_pos = pos + written; | 1253 | iocb->ki_pos = pos + written; |
| 1268 | } | 1254 | } |
| @@ -1300,7 +1286,7 @@ static int fuse_get_user_pages(struct fuse_req *req, struct iov_iter *ii, | |||
| 1300 | size_t nbytes = 0; /* # bytes already packed in req */ | 1286 | size_t nbytes = 0; /* # bytes already packed in req */ |
| 1301 | 1287 | ||
| 1302 | /* Special case for kernel I/O: can copy directly into the buffer */ | 1288 | /* Special case for kernel I/O: can copy directly into the buffer */ |
| 1303 | if (segment_eq(get_fs(), KERNEL_DS)) { | 1289 | if (ii->type & ITER_KVEC) { |
| 1304 | unsigned long user_addr = fuse_get_user_addr(ii); | 1290 | unsigned long user_addr = fuse_get_user_addr(ii); |
| 1305 | size_t frag_size = fuse_get_frag_size(ii, *nbytesp); | 1291 | size_t frag_size = fuse_get_frag_size(ii, *nbytesp); |
| 1306 | 1292 | ||
| @@ -1316,35 +1302,26 @@ static int fuse_get_user_pages(struct fuse_req *req, struct iov_iter *ii, | |||
| 1316 | 1302 | ||
| 1317 | while (nbytes < *nbytesp && req->num_pages < req->max_pages) { | 1303 | while (nbytes < *nbytesp && req->num_pages < req->max_pages) { |
| 1318 | unsigned npages; | 1304 | unsigned npages; |
| 1319 | unsigned long user_addr = fuse_get_user_addr(ii); | 1305 | size_t start; |
| 1320 | unsigned offset = user_addr & ~PAGE_MASK; | ||
| 1321 | size_t frag_size = fuse_get_frag_size(ii, *nbytesp - nbytes); | ||
| 1322 | int ret; | ||
| 1323 | |||
| 1324 | unsigned n = req->max_pages - req->num_pages; | 1306 | unsigned n = req->max_pages - req->num_pages; |
| 1325 | frag_size = min_t(size_t, frag_size, n << PAGE_SHIFT); | 1307 | ssize_t ret = iov_iter_get_pages(ii, |
| 1326 | 1308 | &req->pages[req->num_pages], | |
| 1327 | npages = (frag_size + offset + PAGE_SIZE - 1) >> PAGE_SHIFT; | 1309 | n * PAGE_SIZE, &start); |
| 1328 | npages = clamp(npages, 1U, n); | ||
| 1329 | |||
| 1330 | ret = get_user_pages_fast(user_addr, npages, !write, | ||
| 1331 | &req->pages[req->num_pages]); | ||
| 1332 | if (ret < 0) | 1310 | if (ret < 0) |
| 1333 | return ret; | 1311 | return ret; |
| 1334 | 1312 | ||
| 1335 | npages = ret; | 1313 | iov_iter_advance(ii, ret); |
| 1336 | frag_size = min_t(size_t, frag_size, | 1314 | nbytes += ret; |
| 1337 | (npages << PAGE_SHIFT) - offset); | 1315 | |
| 1338 | iov_iter_advance(ii, frag_size); | 1316 | ret += start; |
| 1317 | npages = (ret + PAGE_SIZE - 1) / PAGE_SIZE; | ||
| 1339 | 1318 | ||
| 1340 | req->page_descs[req->num_pages].offset = offset; | 1319 | req->page_descs[req->num_pages].offset = start; |
| 1341 | fuse_page_descs_length_init(req, req->num_pages, npages); | 1320 | fuse_page_descs_length_init(req, req->num_pages, npages); |
| 1342 | 1321 | ||
| 1343 | req->num_pages += npages; | 1322 | req->num_pages += npages; |
| 1344 | req->page_descs[req->num_pages - 1].length -= | 1323 | req->page_descs[req->num_pages - 1].length -= |
| 1345 | (npages << PAGE_SHIFT) - offset - frag_size; | 1324 | (PAGE_SIZE - ret) & (PAGE_SIZE - 1); |
| 1346 | |||
| 1347 | nbytes += frag_size; | ||
| 1348 | } | 1325 | } |
| 1349 | 1326 | ||
| 1350 | if (write) | 1327 | if (write) |
| @@ -1359,24 +1336,11 @@ static int fuse_get_user_pages(struct fuse_req *req, struct iov_iter *ii, | |||
| 1359 | 1336 | ||
| 1360 | static inline int fuse_iter_npages(const struct iov_iter *ii_p) | 1337 | static inline int fuse_iter_npages(const struct iov_iter *ii_p) |
| 1361 | { | 1338 | { |
| 1362 | struct iov_iter ii = *ii_p; | 1339 | return iov_iter_npages(ii_p, FUSE_MAX_PAGES_PER_REQ); |
| 1363 | int npages = 0; | ||
| 1364 | |||
| 1365 | while (iov_iter_count(&ii) && npages < FUSE_MAX_PAGES_PER_REQ) { | ||
| 1366 | unsigned long user_addr = fuse_get_user_addr(&ii); | ||
| 1367 | unsigned offset = user_addr & ~PAGE_MASK; | ||
| 1368 | size_t frag_size = iov_iter_single_seg_count(&ii); | ||
| 1369 | |||
| 1370 | npages += (frag_size + offset + PAGE_SIZE - 1) >> PAGE_SHIFT; | ||
| 1371 | iov_iter_advance(&ii, frag_size); | ||
| 1372 | } | ||
| 1373 | |||
| 1374 | return min(npages, FUSE_MAX_PAGES_PER_REQ); | ||
| 1375 | } | 1340 | } |
| 1376 | 1341 | ||
| 1377 | ssize_t fuse_direct_io(struct fuse_io_priv *io, const struct iovec *iov, | 1342 | ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter, |
| 1378 | unsigned long nr_segs, size_t count, loff_t *ppos, | 1343 | loff_t *ppos, int flags) |
| 1379 | int flags) | ||
| 1380 | { | 1344 | { |
| 1381 | int write = flags & FUSE_DIO_WRITE; | 1345 | int write = flags & FUSE_DIO_WRITE; |
| 1382 | int cuse = flags & FUSE_DIO_CUSE; | 1346 | int cuse = flags & FUSE_DIO_CUSE; |
| @@ -1386,18 +1350,16 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, const struct iovec *iov, | |||
| 1386 | struct fuse_conn *fc = ff->fc; | 1350 | struct fuse_conn *fc = ff->fc; |
| 1387 | size_t nmax = write ? fc->max_write : fc->max_read; | 1351 | size_t nmax = write ? fc->max_write : fc->max_read; |
| 1388 | loff_t pos = *ppos; | 1352 | loff_t pos = *ppos; |
| 1353 | size_t count = iov_iter_count(iter); | ||
| 1389 | pgoff_t idx_from = pos >> PAGE_CACHE_SHIFT; | 1354 | pgoff_t idx_from = pos >> PAGE_CACHE_SHIFT; |
| 1390 | pgoff_t idx_to = (pos + count - 1) >> PAGE_CACHE_SHIFT; | 1355 | pgoff_t idx_to = (pos + count - 1) >> PAGE_CACHE_SHIFT; |
| 1391 | ssize_t res = 0; | 1356 | ssize_t res = 0; |
| 1392 | struct fuse_req *req; | 1357 | struct fuse_req *req; |
| 1393 | struct iov_iter ii; | ||
| 1394 | |||
| 1395 | iov_iter_init(&ii, iov, nr_segs, count, 0); | ||
| 1396 | 1358 | ||
| 1397 | if (io->async) | 1359 | if (io->async) |
| 1398 | req = fuse_get_req_for_background(fc, fuse_iter_npages(&ii)); | 1360 | req = fuse_get_req_for_background(fc, fuse_iter_npages(iter)); |
| 1399 | else | 1361 | else |
| 1400 | req = fuse_get_req(fc, fuse_iter_npages(&ii)); | 1362 | req = fuse_get_req(fc, fuse_iter_npages(iter)); |
| 1401 | if (IS_ERR(req)) | 1363 | if (IS_ERR(req)) |
| 1402 | return PTR_ERR(req); | 1364 | return PTR_ERR(req); |
| 1403 | 1365 | ||
| @@ -1413,7 +1375,7 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, const struct iovec *iov, | |||
| 1413 | size_t nres; | 1375 | size_t nres; |
| 1414 | fl_owner_t owner = current->files; | 1376 | fl_owner_t owner = current->files; |
| 1415 | size_t nbytes = min(count, nmax); | 1377 | size_t nbytes = min(count, nmax); |
| 1416 | int err = fuse_get_user_pages(req, &ii, &nbytes, write); | 1378 | int err = fuse_get_user_pages(req, iter, &nbytes, write); |
| 1417 | if (err) { | 1379 | if (err) { |
| 1418 | res = err; | 1380 | res = err; |
| 1419 | break; | 1381 | break; |
| @@ -1443,9 +1405,9 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, const struct iovec *iov, | |||
| 1443 | fuse_put_request(fc, req); | 1405 | fuse_put_request(fc, req); |
| 1444 | if (io->async) | 1406 | if (io->async) |
| 1445 | req = fuse_get_req_for_background(fc, | 1407 | req = fuse_get_req_for_background(fc, |
| 1446 | fuse_iter_npages(&ii)); | 1408 | fuse_iter_npages(iter)); |
| 1447 | else | 1409 | else |
| 1448 | req = fuse_get_req(fc, fuse_iter_npages(&ii)); | 1410 | req = fuse_get_req(fc, fuse_iter_npages(iter)); |
| 1449 | if (IS_ERR(req)) | 1411 | if (IS_ERR(req)) |
| 1450 | break; | 1412 | break; |
| 1451 | } | 1413 | } |
| @@ -1460,9 +1422,8 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, const struct iovec *iov, | |||
| 1460 | EXPORT_SYMBOL_GPL(fuse_direct_io); | 1422 | EXPORT_SYMBOL_GPL(fuse_direct_io); |
| 1461 | 1423 | ||
| 1462 | static ssize_t __fuse_direct_read(struct fuse_io_priv *io, | 1424 | static ssize_t __fuse_direct_read(struct fuse_io_priv *io, |
| 1463 | const struct iovec *iov, | 1425 | struct iov_iter *iter, |
| 1464 | unsigned long nr_segs, loff_t *ppos, | 1426 | loff_t *ppos) |
| 1465 | size_t count) | ||
| 1466 | { | 1427 | { |
| 1467 | ssize_t res; | 1428 | ssize_t res; |
| 1468 | struct file *file = io->file; | 1429 | struct file *file = io->file; |
| @@ -1471,7 +1432,7 @@ static ssize_t __fuse_direct_read(struct fuse_io_priv *io, | |||
| 1471 | if (is_bad_inode(inode)) | 1432 | if (is_bad_inode(inode)) |
| 1472 | return -EIO; | 1433 | return -EIO; |
| 1473 | 1434 | ||
| 1474 | res = fuse_direct_io(io, iov, nr_segs, count, ppos, 0); | 1435 | res = fuse_direct_io(io, iter, ppos, 0); |
| 1475 | 1436 | ||
| 1476 | fuse_invalidate_attr(inode); | 1437 | fuse_invalidate_attr(inode); |
| 1477 | 1438 | ||
| @@ -1483,22 +1444,26 @@ static ssize_t fuse_direct_read(struct file *file, char __user *buf, | |||
| 1483 | { | 1444 | { |
| 1484 | struct fuse_io_priv io = { .async = 0, .file = file }; | 1445 | struct fuse_io_priv io = { .async = 0, .file = file }; |
| 1485 | struct iovec iov = { .iov_base = buf, .iov_len = count }; | 1446 | struct iovec iov = { .iov_base = buf, .iov_len = count }; |
| 1486 | return __fuse_direct_read(&io, &iov, 1, ppos, count); | 1447 | struct iov_iter ii; |
| 1448 | iov_iter_init(&ii, READ, &iov, 1, count); | ||
| 1449 | return __fuse_direct_read(&io, &ii, ppos); | ||
| 1487 | } | 1450 | } |
| 1488 | 1451 | ||
| 1489 | static ssize_t __fuse_direct_write(struct fuse_io_priv *io, | 1452 | static ssize_t __fuse_direct_write(struct fuse_io_priv *io, |
| 1490 | const struct iovec *iov, | 1453 | struct iov_iter *iter, |
| 1491 | unsigned long nr_segs, loff_t *ppos) | 1454 | loff_t *ppos) |
| 1492 | { | 1455 | { |
| 1493 | struct file *file = io->file; | 1456 | struct file *file = io->file; |
| 1494 | struct inode *inode = file_inode(file); | 1457 | struct inode *inode = file_inode(file); |
| 1495 | size_t count = iov_length(iov, nr_segs); | 1458 | size_t count = iov_iter_count(iter); |
| 1496 | ssize_t res; | 1459 | ssize_t res; |
| 1497 | 1460 | ||
| 1461 | |||
| 1498 | res = generic_write_checks(file, ppos, &count, 0); | 1462 | res = generic_write_checks(file, ppos, &count, 0); |
| 1499 | if (!res) | 1463 | if (!res) { |
| 1500 | res = fuse_direct_io(io, iov, nr_segs, count, ppos, | 1464 | iov_iter_truncate(iter, count); |
| 1501 | FUSE_DIO_WRITE); | 1465 | res = fuse_direct_io(io, iter, ppos, FUSE_DIO_WRITE); |
| 1466 | } | ||
| 1502 | 1467 | ||
| 1503 | fuse_invalidate_attr(inode); | 1468 | fuse_invalidate_attr(inode); |
| 1504 | 1469 | ||
| @@ -1512,13 +1477,15 @@ static ssize_t fuse_direct_write(struct file *file, const char __user *buf, | |||
| 1512 | struct inode *inode = file_inode(file); | 1477 | struct inode *inode = file_inode(file); |
| 1513 | ssize_t res; | 1478 | ssize_t res; |
| 1514 | struct fuse_io_priv io = { .async = 0, .file = file }; | 1479 | struct fuse_io_priv io = { .async = 0, .file = file }; |
| 1480 | struct iov_iter ii; | ||
| 1481 | iov_iter_init(&ii, WRITE, &iov, 1, count); | ||
| 1515 | 1482 | ||
| 1516 | if (is_bad_inode(inode)) | 1483 | if (is_bad_inode(inode)) |
| 1517 | return -EIO; | 1484 | return -EIO; |
| 1518 | 1485 | ||
| 1519 | /* Don't allow parallel writes to the same file */ | 1486 | /* Don't allow parallel writes to the same file */ |
| 1520 | mutex_lock(&inode->i_mutex); | 1487 | mutex_lock(&inode->i_mutex); |
| 1521 | res = __fuse_direct_write(&io, &iov, 1, ppos); | 1488 | res = __fuse_direct_write(&io, &ii, ppos); |
| 1522 | if (res > 0) | 1489 | if (res > 0) |
| 1523 | fuse_write_update_size(inode, *ppos); | 1490 | fuse_write_update_size(inode, *ppos); |
| 1524 | mutex_unlock(&inode->i_mutex); | 1491 | mutex_unlock(&inode->i_mutex); |
| @@ -2372,7 +2339,7 @@ static int fuse_ioctl_copy_user(struct page **pages, struct iovec *iov, | |||
| 2372 | if (!bytes) | 2339 | if (!bytes) |
| 2373 | return 0; | 2340 | return 0; |
| 2374 | 2341 | ||
| 2375 | iov_iter_init(&ii, iov, nr_segs, bytes, 0); | 2342 | iov_iter_init(&ii, to_user ? READ : WRITE, iov, nr_segs, bytes); |
| 2376 | 2343 | ||
| 2377 | while (iov_iter_count(&ii)) { | 2344 | while (iov_iter_count(&ii)) { |
| 2378 | struct page *page = pages[page_idx++]; | 2345 | struct page *page = pages[page_idx++]; |
| @@ -2894,8 +2861,8 @@ static inline loff_t fuse_round_up(loff_t off) | |||
| 2894 | } | 2861 | } |
| 2895 | 2862 | ||
| 2896 | static ssize_t | 2863 | static ssize_t |
| 2897 | fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, | 2864 | fuse_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, |
| 2898 | loff_t offset, unsigned long nr_segs) | 2865 | loff_t offset) |
| 2899 | { | 2866 | { |
| 2900 | ssize_t ret = 0; | 2867 | ssize_t ret = 0; |
| 2901 | struct file *file = iocb->ki_filp; | 2868 | struct file *file = iocb->ki_filp; |
| @@ -2904,7 +2871,7 @@ fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, | |||
| 2904 | loff_t pos = 0; | 2871 | loff_t pos = 0; |
| 2905 | struct inode *inode; | 2872 | struct inode *inode; |
| 2906 | loff_t i_size; | 2873 | loff_t i_size; |
| 2907 | size_t count = iov_length(iov, nr_segs); | 2874 | size_t count = iov_iter_count(iter); |
| 2908 | struct fuse_io_priv *io; | 2875 | struct fuse_io_priv *io; |
| 2909 | 2876 | ||
| 2910 | pos = offset; | 2877 | pos = offset; |
| @@ -2919,6 +2886,7 @@ fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, | |||
| 2919 | if (offset >= i_size) | 2886 | if (offset >= i_size) |
| 2920 | return 0; | 2887 | return 0; |
| 2921 | count = min_t(loff_t, count, fuse_round_up(i_size - offset)); | 2888 | count = min_t(loff_t, count, fuse_round_up(i_size - offset)); |
| 2889 | iov_iter_truncate(iter, count); | ||
| 2922 | } | 2890 | } |
| 2923 | 2891 | ||
| 2924 | io = kmalloc(sizeof(struct fuse_io_priv), GFP_KERNEL); | 2892 | io = kmalloc(sizeof(struct fuse_io_priv), GFP_KERNEL); |
| @@ -2948,9 +2916,9 @@ fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, | |||
| 2948 | io->async = false; | 2916 | io->async = false; |
| 2949 | 2917 | ||
| 2950 | if (rw == WRITE) | 2918 | if (rw == WRITE) |
| 2951 | ret = __fuse_direct_write(io, iov, nr_segs, &pos); | 2919 | ret = __fuse_direct_write(io, iter, &pos); |
| 2952 | else | 2920 | else |
| 2953 | ret = __fuse_direct_read(io, iov, nr_segs, &pos, count); | 2921 | ret = __fuse_direct_read(io, iter, &pos); |
| 2954 | 2922 | ||
| 2955 | if (io->async) { | 2923 | if (io->async) { |
| 2956 | fuse_aio_complete(io, ret < 0 ? ret : 0, -1); | 2924 | fuse_aio_complete(io, ret < 0 ? ret : 0, -1); |
| @@ -3061,10 +3029,10 @@ out: | |||
| 3061 | 3029 | ||
| 3062 | static const struct file_operations fuse_file_operations = { | 3030 | static const struct file_operations fuse_file_operations = { |
| 3063 | .llseek = fuse_file_llseek, | 3031 | .llseek = fuse_file_llseek, |
| 3064 | .read = do_sync_read, | 3032 | .read = new_sync_read, |
| 3065 | .aio_read = fuse_file_aio_read, | 3033 | .read_iter = fuse_file_read_iter, |
| 3066 | .write = do_sync_write, | 3034 | .write = new_sync_write, |
| 3067 | .aio_write = fuse_file_aio_write, | 3035 | .write_iter = fuse_file_write_iter, |
| 3068 | .mmap = fuse_file_mmap, | 3036 | .mmap = fuse_file_mmap, |
| 3069 | .open = fuse_open, | 3037 | .open = fuse_open, |
| 3070 | .flush = fuse_flush, | 3038 | .flush = fuse_flush, |
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 7aa5c75e0de1..e8e47a6ab518 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h | |||
| @@ -880,9 +880,8 @@ int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file, | |||
| 880 | /** CUSE pass fuse_direct_io() a file which f_mapping->host is not from FUSE */ | 880 | /** CUSE pass fuse_direct_io() a file which f_mapping->host is not from FUSE */ |
| 881 | #define FUSE_DIO_CUSE (1 << 1) | 881 | #define FUSE_DIO_CUSE (1 << 1) |
| 882 | 882 | ||
| 883 | ssize_t fuse_direct_io(struct fuse_io_priv *io, const struct iovec *iov, | 883 | ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter, |
| 884 | unsigned long nr_segs, size_t count, loff_t *ppos, | 884 | loff_t *ppos, int flags); |
| 885 | int flags); | ||
| 886 | long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg, | 885 | long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg, |
| 887 | unsigned int flags); | 886 | unsigned int flags); |
| 888 | long fuse_ioctl_common(struct file *file, unsigned int cmd, | 887 | long fuse_ioctl_common(struct file *file, unsigned int cmd, |
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index 492123cda64a..805b37fed638 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c | |||
| @@ -1040,8 +1040,7 @@ static int gfs2_ok_for_dio(struct gfs2_inode *ip, int rw, loff_t offset) | |||
| 1040 | 1040 | ||
| 1041 | 1041 | ||
| 1042 | static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb, | 1042 | static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb, |
| 1043 | const struct iovec *iov, loff_t offset, | 1043 | struct iov_iter *iter, loff_t offset) |
| 1044 | unsigned long nr_segs) | ||
| 1045 | { | 1044 | { |
| 1046 | struct file *file = iocb->ki_filp; | 1045 | struct file *file = iocb->ki_filp; |
| 1047 | struct inode *inode = file->f_mapping->host; | 1046 | struct inode *inode = file->f_mapping->host; |
| @@ -1081,7 +1080,7 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb, | |||
| 1081 | */ | 1080 | */ |
| 1082 | if (mapping->nrpages) { | 1081 | if (mapping->nrpages) { |
| 1083 | loff_t lstart = offset & (PAGE_CACHE_SIZE - 1); | 1082 | loff_t lstart = offset & (PAGE_CACHE_SIZE - 1); |
| 1084 | loff_t len = iov_length(iov, nr_segs); | 1083 | loff_t len = iov_iter_count(iter); |
| 1085 | loff_t end = PAGE_ALIGN(offset + len) - 1; | 1084 | loff_t end = PAGE_ALIGN(offset + len) - 1; |
| 1086 | 1085 | ||
| 1087 | rv = 0; | 1086 | rv = 0; |
| @@ -1096,9 +1095,9 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb, | |||
| 1096 | truncate_inode_pages_range(mapping, lstart, end); | 1095 | truncate_inode_pages_range(mapping, lstart, end); |
| 1097 | } | 1096 | } |
| 1098 | 1097 | ||
| 1099 | rv = __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, | 1098 | rv = __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, |
| 1100 | offset, nr_segs, gfs2_get_block_direct, | 1099 | iter, offset, |
| 1101 | NULL, NULL, 0); | 1100 | gfs2_get_block_direct, NULL, NULL, 0); |
| 1102 | out: | 1101 | out: |
| 1103 | gfs2_glock_dq(&gh); | 1102 | gfs2_glock_dq(&gh); |
| 1104 | gfs2_holder_uninit(&gh); | 1103 | gfs2_holder_uninit(&gh); |
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 6ab0cfb2e891..4fc3a3046174 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c | |||
| @@ -684,7 +684,7 @@ static int gfs2_fsync(struct file *file, loff_t start, loff_t end, | |||
| 684 | } | 684 | } |
| 685 | 685 | ||
| 686 | /** | 686 | /** |
| 687 | * gfs2_file_aio_write - Perform a write to a file | 687 | * gfs2_file_write_iter - Perform a write to a file |
| 688 | * @iocb: The io context | 688 | * @iocb: The io context |
| 689 | * @iov: The data to write | 689 | * @iov: The data to write |
| 690 | * @nr_segs: Number of @iov segments | 690 | * @nr_segs: Number of @iov segments |
| @@ -697,11 +697,9 @@ static int gfs2_fsync(struct file *file, loff_t start, loff_t end, | |||
| 697 | * | 697 | * |
| 698 | */ | 698 | */ |
| 699 | 699 | ||
| 700 | static ssize_t gfs2_file_aio_write(struct kiocb *iocb, const struct iovec *iov, | 700 | static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from) |
| 701 | unsigned long nr_segs, loff_t pos) | ||
| 702 | { | 701 | { |
| 703 | struct file *file = iocb->ki_filp; | 702 | struct file *file = iocb->ki_filp; |
| 704 | size_t writesize = iov_length(iov, nr_segs); | ||
| 705 | struct gfs2_inode *ip = GFS2_I(file_inode(file)); | 703 | struct gfs2_inode *ip = GFS2_I(file_inode(file)); |
| 706 | int ret; | 704 | int ret; |
| 707 | 705 | ||
| @@ -709,7 +707,7 @@ static ssize_t gfs2_file_aio_write(struct kiocb *iocb, const struct iovec *iov, | |||
| 709 | if (ret) | 707 | if (ret) |
| 710 | return ret; | 708 | return ret; |
| 711 | 709 | ||
| 712 | gfs2_size_hint(file, pos, writesize); | 710 | gfs2_size_hint(file, iocb->ki_pos, iov_iter_count(from)); |
| 713 | 711 | ||
| 714 | if (file->f_flags & O_APPEND) { | 712 | if (file->f_flags & O_APPEND) { |
| 715 | struct gfs2_holder gh; | 713 | struct gfs2_holder gh; |
| @@ -720,7 +718,7 @@ static ssize_t gfs2_file_aio_write(struct kiocb *iocb, const struct iovec *iov, | |||
| 720 | gfs2_glock_dq_uninit(&gh); | 718 | gfs2_glock_dq_uninit(&gh); |
| 721 | } | 719 | } |
| 722 | 720 | ||
| 723 | return generic_file_aio_write(iocb, iov, nr_segs, pos); | 721 | return generic_file_write_iter(iocb, from); |
| 724 | } | 722 | } |
| 725 | 723 | ||
| 726 | static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len, | 724 | static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len, |
| @@ -1058,10 +1056,10 @@ static int gfs2_flock(struct file *file, int cmd, struct file_lock *fl) | |||
| 1058 | 1056 | ||
| 1059 | const struct file_operations gfs2_file_fops = { | 1057 | const struct file_operations gfs2_file_fops = { |
| 1060 | .llseek = gfs2_llseek, | 1058 | .llseek = gfs2_llseek, |
| 1061 | .read = do_sync_read, | 1059 | .read = new_sync_read, |
| 1062 | .aio_read = generic_file_aio_read, | 1060 | .read_iter = generic_file_read_iter, |
| 1063 | .write = do_sync_write, | 1061 | .write = new_sync_write, |
| 1064 | .aio_write = gfs2_file_aio_write, | 1062 | .write_iter = gfs2_file_write_iter, |
| 1065 | .unlocked_ioctl = gfs2_ioctl, | 1063 | .unlocked_ioctl = gfs2_ioctl, |
| 1066 | .mmap = gfs2_mmap, | 1064 | .mmap = gfs2_mmap, |
| 1067 | .open = gfs2_open, | 1065 | .open = gfs2_open, |
| @@ -1070,7 +1068,7 @@ const struct file_operations gfs2_file_fops = { | |||
| 1070 | .lock = gfs2_lock, | 1068 | .lock = gfs2_lock, |
| 1071 | .flock = gfs2_flock, | 1069 | .flock = gfs2_flock, |
| 1072 | .splice_read = generic_file_splice_read, | 1070 | .splice_read = generic_file_splice_read, |
| 1073 | .splice_write = generic_file_splice_write, | 1071 | .splice_write = iter_file_splice_write, |
| 1074 | .setlease = gfs2_setlease, | 1072 | .setlease = gfs2_setlease, |
| 1075 | .fallocate = gfs2_fallocate, | 1073 | .fallocate = gfs2_fallocate, |
| 1076 | }; | 1074 | }; |
| @@ -1090,17 +1088,17 @@ const struct file_operations gfs2_dir_fops = { | |||
| 1090 | 1088 | ||
| 1091 | const struct file_operations gfs2_file_fops_nolock = { | 1089 | const struct file_operations gfs2_file_fops_nolock = { |
| 1092 | .llseek = gfs2_llseek, | 1090 | .llseek = gfs2_llseek, |
| 1093 | .read = do_sync_read, | 1091 | .read = new_sync_read, |
| 1094 | .aio_read = generic_file_aio_read, | 1092 | .read_iter = generic_file_read_iter, |
| 1095 | .write = do_sync_write, | 1093 | .write = new_sync_write, |
| 1096 | .aio_write = gfs2_file_aio_write, | 1094 | .write_iter = gfs2_file_write_iter, |
| 1097 | .unlocked_ioctl = gfs2_ioctl, | 1095 | .unlocked_ioctl = gfs2_ioctl, |
| 1098 | .mmap = gfs2_mmap, | 1096 | .mmap = gfs2_mmap, |
| 1099 | .open = gfs2_open, | 1097 | .open = gfs2_open, |
| 1100 | .release = gfs2_release, | 1098 | .release = gfs2_release, |
| 1101 | .fsync = gfs2_fsync, | 1099 | .fsync = gfs2_fsync, |
| 1102 | .splice_read = generic_file_splice_read, | 1100 | .splice_read = generic_file_splice_read, |
| 1103 | .splice_write = generic_file_splice_write, | 1101 | .splice_write = iter_file_splice_write, |
| 1104 | .setlease = generic_setlease, | 1102 | .setlease = generic_setlease, |
| 1105 | .fallocate = gfs2_fallocate, | 1103 | .fallocate = gfs2_fallocate, |
| 1106 | }; | 1104 | }; |
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c index 9e2fecd62f62..d0929bc81782 100644 --- a/fs/hfs/inode.c +++ b/fs/hfs/inode.c | |||
| @@ -125,15 +125,15 @@ static int hfs_releasepage(struct page *page, gfp_t mask) | |||
| 125 | } | 125 | } |
| 126 | 126 | ||
| 127 | static ssize_t hfs_direct_IO(int rw, struct kiocb *iocb, | 127 | static ssize_t hfs_direct_IO(int rw, struct kiocb *iocb, |
| 128 | const struct iovec *iov, loff_t offset, unsigned long nr_segs) | 128 | struct iov_iter *iter, loff_t offset) |
| 129 | { | 129 | { |
| 130 | struct file *file = iocb->ki_filp; | 130 | struct file *file = iocb->ki_filp; |
| 131 | struct address_space *mapping = file->f_mapping; | 131 | struct address_space *mapping = file->f_mapping; |
| 132 | struct inode *inode = file_inode(file)->i_mapping->host; | 132 | struct inode *inode = file_inode(file)->i_mapping->host; |
| 133 | size_t count = iov_iter_count(iter); | ||
| 133 | ssize_t ret; | 134 | ssize_t ret; |
| 134 | 135 | ||
| 135 | ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs, | 136 | ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, hfs_get_block); |
| 136 | hfs_get_block); | ||
| 137 | 137 | ||
| 138 | /* | 138 | /* |
| 139 | * In case of error extending write may have instantiated a few | 139 | * In case of error extending write may have instantiated a few |
| @@ -141,7 +141,7 @@ static ssize_t hfs_direct_IO(int rw, struct kiocb *iocb, | |||
| 141 | */ | 141 | */ |
| 142 | if (unlikely((rw & WRITE) && ret < 0)) { | 142 | if (unlikely((rw & WRITE) && ret < 0)) { |
| 143 | loff_t isize = i_size_read(inode); | 143 | loff_t isize = i_size_read(inode); |
| 144 | loff_t end = offset + iov_length(iov, nr_segs); | 144 | loff_t end = offset + count; |
| 145 | 145 | ||
| 146 | if (end > isize) | 146 | if (end > isize) |
| 147 | hfs_write_failed(mapping, end); | 147 | hfs_write_failed(mapping, end); |
| @@ -674,10 +674,10 @@ static int hfs_file_fsync(struct file *filp, loff_t start, loff_t end, | |||
| 674 | 674 | ||
| 675 | static const struct file_operations hfs_file_operations = { | 675 | static const struct file_operations hfs_file_operations = { |
| 676 | .llseek = generic_file_llseek, | 676 | .llseek = generic_file_llseek, |
| 677 | .read = do_sync_read, | 677 | .read = new_sync_read, |
| 678 | .aio_read = generic_file_aio_read, | 678 | .read_iter = generic_file_read_iter, |
| 679 | .write = do_sync_write, | 679 | .write = new_sync_write, |
| 680 | .aio_write = generic_file_aio_write, | 680 | .write_iter = generic_file_write_iter, |
| 681 | .mmap = generic_file_mmap, | 681 | .mmap = generic_file_mmap, |
| 682 | .splice_read = generic_file_splice_read, | 682 | .splice_read = generic_file_splice_read, |
| 683 | .fsync = hfs_file_fsync, | 683 | .fsync = hfs_file_fsync, |
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c index a4f45bd88a63..0cf786f2d046 100644 --- a/fs/hfsplus/inode.c +++ b/fs/hfsplus/inode.c | |||
| @@ -123,14 +123,15 @@ static int hfsplus_releasepage(struct page *page, gfp_t mask) | |||
| 123 | } | 123 | } |
| 124 | 124 | ||
| 125 | static ssize_t hfsplus_direct_IO(int rw, struct kiocb *iocb, | 125 | static ssize_t hfsplus_direct_IO(int rw, struct kiocb *iocb, |
| 126 | const struct iovec *iov, loff_t offset, unsigned long nr_segs) | 126 | struct iov_iter *iter, loff_t offset) |
| 127 | { | 127 | { |
| 128 | struct file *file = iocb->ki_filp; | 128 | struct file *file = iocb->ki_filp; |
| 129 | struct address_space *mapping = file->f_mapping; | 129 | struct address_space *mapping = file->f_mapping; |
| 130 | struct inode *inode = file_inode(file)->i_mapping->host; | 130 | struct inode *inode = file_inode(file)->i_mapping->host; |
| 131 | size_t count = iov_iter_count(iter); | ||
| 131 | ssize_t ret; | 132 | ssize_t ret; |
| 132 | 133 | ||
| 133 | ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs, | 134 | ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, |
| 134 | hfsplus_get_block); | 135 | hfsplus_get_block); |
| 135 | 136 | ||
| 136 | /* | 137 | /* |
| @@ -139,7 +140,7 @@ static ssize_t hfsplus_direct_IO(int rw, struct kiocb *iocb, | |||
| 139 | */ | 140 | */ |
| 140 | if (unlikely((rw & WRITE) && ret < 0)) { | 141 | if (unlikely((rw & WRITE) && ret < 0)) { |
| 141 | loff_t isize = i_size_read(inode); | 142 | loff_t isize = i_size_read(inode); |
| 142 | loff_t end = offset + iov_length(iov, nr_segs); | 143 | loff_t end = offset + count; |
| 143 | 144 | ||
| 144 | if (end > isize) | 145 | if (end > isize) |
| 145 | hfsplus_write_failed(mapping, end); | 146 | hfsplus_write_failed(mapping, end); |
| @@ -340,10 +341,10 @@ static const struct inode_operations hfsplus_file_inode_operations = { | |||
| 340 | 341 | ||
| 341 | static const struct file_operations hfsplus_file_operations = { | 342 | static const struct file_operations hfsplus_file_operations = { |
| 342 | .llseek = generic_file_llseek, | 343 | .llseek = generic_file_llseek, |
| 343 | .read = do_sync_read, | 344 | .read = new_sync_read, |
| 344 | .aio_read = generic_file_aio_read, | 345 | .read_iter = generic_file_read_iter, |
| 345 | .write = do_sync_write, | 346 | .write = new_sync_write, |
| 346 | .aio_write = generic_file_aio_write, | 347 | .write_iter = generic_file_write_iter, |
| 347 | .mmap = generic_file_mmap, | 348 | .mmap = generic_file_mmap, |
| 348 | .splice_read = generic_file_splice_read, | 349 | .splice_read = generic_file_splice_read, |
| 349 | .fsync = hfsplus_file_fsync, | 350 | .fsync = hfsplus_file_fsync, |
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index 9c470fde9878..bb529f3b7f2b 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c | |||
| @@ -378,11 +378,11 @@ static int hostfs_fsync(struct file *file, loff_t start, loff_t end, | |||
| 378 | 378 | ||
| 379 | static const struct file_operations hostfs_file_fops = { | 379 | static const struct file_operations hostfs_file_fops = { |
| 380 | .llseek = generic_file_llseek, | 380 | .llseek = generic_file_llseek, |
| 381 | .read = do_sync_read, | 381 | .read = new_sync_read, |
| 382 | .splice_read = generic_file_splice_read, | 382 | .splice_read = generic_file_splice_read, |
| 383 | .aio_read = generic_file_aio_read, | 383 | .read_iter = generic_file_read_iter, |
| 384 | .aio_write = generic_file_aio_write, | 384 | .write_iter = generic_file_write_iter, |
| 385 | .write = do_sync_write, | 385 | .write = new_sync_write, |
| 386 | .mmap = generic_file_mmap, | 386 | .mmap = generic_file_mmap, |
| 387 | .open = hostfs_file_open, | 387 | .open = hostfs_file_open, |
| 388 | .release = hostfs_file_release, | 388 | .release = hostfs_file_release, |
diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c index 67c1a61e0955..7f54e5f76cec 100644 --- a/fs/hpfs/file.c +++ b/fs/hpfs/file.c | |||
| @@ -197,10 +197,10 @@ const struct address_space_operations hpfs_aops = { | |||
| 197 | const struct file_operations hpfs_file_ops = | 197 | const struct file_operations hpfs_file_ops = |
| 198 | { | 198 | { |
| 199 | .llseek = generic_file_llseek, | 199 | .llseek = generic_file_llseek, |
| 200 | .read = do_sync_read, | 200 | .read = new_sync_read, |
| 201 | .aio_read = generic_file_aio_read, | 201 | .read_iter = generic_file_read_iter, |
| 202 | .write = do_sync_write, | 202 | .write = new_sync_write, |
| 203 | .aio_write = generic_file_aio_write, | 203 | .write_iter = generic_file_write_iter, |
| 204 | .mmap = generic_file_mmap, | 204 | .mmap = generic_file_mmap, |
| 205 | .release = hpfs_file_release, | 205 | .release = hpfs_file_release, |
| 206 | .fsync = hpfs_file_fsync, | 206 | .fsync = hpfs_file_fsync, |
diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c index 256cd19a3b78..64989ca9ba90 100644 --- a/fs/jffs2/file.c +++ b/fs/jffs2/file.c | |||
| @@ -51,10 +51,10 @@ const struct file_operations jffs2_file_operations = | |||
| 51 | { | 51 | { |
| 52 | .llseek = generic_file_llseek, | 52 | .llseek = generic_file_llseek, |
| 53 | .open = generic_file_open, | 53 | .open = generic_file_open, |
| 54 | .read = do_sync_read, | 54 | .read = new_sync_read, |
| 55 | .aio_read = generic_file_aio_read, | 55 | .read_iter = generic_file_read_iter, |
| 56 | .write = do_sync_write, | 56 | .write = new_sync_write, |
| 57 | .aio_write = generic_file_aio_write, | 57 | .write_iter = generic_file_write_iter, |
| 58 | .unlocked_ioctl=jffs2_ioctl, | 58 | .unlocked_ioctl=jffs2_ioctl, |
| 59 | .mmap = generic_file_readonly_mmap, | 59 | .mmap = generic_file_readonly_mmap, |
| 60 | .fsync = jffs2_fsync, | 60 | .fsync = jffs2_fsync, |
diff --git a/fs/jfs/file.c b/fs/jfs/file.c index 794da944d5cd..33aa0cc1f8b8 100644 --- a/fs/jfs/file.c +++ b/fs/jfs/file.c | |||
| @@ -151,13 +151,13 @@ const struct inode_operations jfs_file_inode_operations = { | |||
| 151 | const struct file_operations jfs_file_operations = { | 151 | const struct file_operations jfs_file_operations = { |
| 152 | .open = jfs_open, | 152 | .open = jfs_open, |
| 153 | .llseek = generic_file_llseek, | 153 | .llseek = generic_file_llseek, |
| 154 | .write = do_sync_write, | 154 | .write = new_sync_write, |
| 155 | .read = do_sync_read, | 155 | .read = new_sync_read, |
| 156 | .aio_read = generic_file_aio_read, | 156 | .read_iter = generic_file_read_iter, |
| 157 | .aio_write = generic_file_aio_write, | 157 | .write_iter = generic_file_write_iter, |
| 158 | .mmap = generic_file_mmap, | 158 | .mmap = generic_file_mmap, |
| 159 | .splice_read = generic_file_splice_read, | 159 | .splice_read = generic_file_splice_read, |
| 160 | .splice_write = generic_file_splice_write, | 160 | .splice_write = iter_file_splice_write, |
| 161 | .fsync = jfs_fsync, | 161 | .fsync = jfs_fsync, |
| 162 | .release = jfs_release, | 162 | .release = jfs_release, |
| 163 | .unlocked_ioctl = jfs_ioctl, | 163 | .unlocked_ioctl = jfs_ioctl, |
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c index 6f8fe72c2a7a..bd3df1ca3c9b 100644 --- a/fs/jfs/inode.c +++ b/fs/jfs/inode.c | |||
| @@ -331,15 +331,15 @@ static sector_t jfs_bmap(struct address_space *mapping, sector_t block) | |||
| 331 | } | 331 | } |
| 332 | 332 | ||
| 333 | static ssize_t jfs_direct_IO(int rw, struct kiocb *iocb, | 333 | static ssize_t jfs_direct_IO(int rw, struct kiocb *iocb, |
| 334 | const struct iovec *iov, loff_t offset, unsigned long nr_segs) | 334 | struct iov_iter *iter, loff_t offset) |
| 335 | { | 335 | { |
| 336 | struct file *file = iocb->ki_filp; | 336 | struct file *file = iocb->ki_filp; |
| 337 | struct address_space *mapping = file->f_mapping; | 337 | struct address_space *mapping = file->f_mapping; |
| 338 | struct inode *inode = file->f_mapping->host; | 338 | struct inode *inode = file->f_mapping->host; |
| 339 | size_t count = iov_iter_count(iter); | ||
| 339 | ssize_t ret; | 340 | ssize_t ret; |
| 340 | 341 | ||
| 341 | ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs, | 342 | ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, jfs_get_block); |
| 342 | jfs_get_block); | ||
| 343 | 343 | ||
| 344 | /* | 344 | /* |
| 345 | * In case of error extending write may have instantiated a few | 345 | * In case of error extending write may have instantiated a few |
| @@ -347,7 +347,7 @@ static ssize_t jfs_direct_IO(int rw, struct kiocb *iocb, | |||
| 347 | */ | 347 | */ |
| 348 | if (unlikely((rw & WRITE) && ret < 0)) { | 348 | if (unlikely((rw & WRITE) && ret < 0)) { |
| 349 | loff_t isize = i_size_read(inode); | 349 | loff_t isize = i_size_read(inode); |
| 350 | loff_t end = offset + iov_length(iov, nr_segs); | 350 | loff_t end = offset + count; |
| 351 | 351 | ||
| 352 | if (end > isize) | 352 | if (end > isize) |
| 353 | jfs_write_failed(mapping, end); | 353 | jfs_write_failed(mapping, end); |
diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c index e3d37f607f97..d895b4b7b661 100644 --- a/fs/kernfs/file.c +++ b/fs/kernfs/file.c | |||
| @@ -39,6 +39,19 @@ struct kernfs_open_node { | |||
| 39 | struct list_head files; /* goes through kernfs_open_file.list */ | 39 | struct list_head files; /* goes through kernfs_open_file.list */ |
| 40 | }; | 40 | }; |
| 41 | 41 | ||
| 42 | /* | ||
| 43 | * kernfs_notify() may be called from any context and bounces notifications | ||
| 44 | * through a work item. To minimize space overhead in kernfs_node, the | ||
| 45 | * pending queue is implemented as a singly linked list of kernfs_nodes. | ||
| 46 | * The list is terminated with the self pointer so that whether a | ||
| 47 | * kernfs_node is on the list or not can be determined by testing the next | ||
| 48 | * pointer for NULL. | ||
| 49 | */ | ||
| 50 | #define KERNFS_NOTIFY_EOL ((void *)&kernfs_notify_list) | ||
| 51 | |||
| 52 | static DEFINE_SPINLOCK(kernfs_notify_lock); | ||
| 53 | static struct kernfs_node *kernfs_notify_list = KERNFS_NOTIFY_EOL; | ||
| 54 | |||
| 42 | static struct kernfs_open_file *kernfs_of(struct file *file) | 55 | static struct kernfs_open_file *kernfs_of(struct file *file) |
| 43 | { | 56 | { |
| 44 | return ((struct seq_file *)file->private_data)->private; | 57 | return ((struct seq_file *)file->private_data)->private; |
| @@ -783,24 +796,25 @@ static unsigned int kernfs_fop_poll(struct file *filp, poll_table *wait) | |||
| 783 | return DEFAULT_POLLMASK|POLLERR|POLLPRI; | 796 | return DEFAULT_POLLMASK|POLLERR|POLLPRI; |
| 784 | } | 797 | } |
| 785 | 798 | ||
| 786 | /** | 799 | static void kernfs_notify_workfn(struct work_struct *work) |
| 787 | * kernfs_notify - notify a kernfs file | ||
| 788 | * @kn: file to notify | ||
| 789 | * | ||
| 790 | * Notify @kn such that poll(2) on @kn wakes up. | ||
| 791 | */ | ||
| 792 | void kernfs_notify(struct kernfs_node *kn) | ||
| 793 | { | 800 | { |
| 794 | struct kernfs_root *root = kernfs_root(kn); | 801 | struct kernfs_node *kn; |
| 795 | struct kernfs_open_node *on; | 802 | struct kernfs_open_node *on; |
| 796 | struct kernfs_super_info *info; | 803 | struct kernfs_super_info *info; |
| 797 | unsigned long flags; | 804 | repeat: |
| 798 | 805 | /* pop one off the notify_list */ | |
| 799 | if (WARN_ON(kernfs_type(kn) != KERNFS_FILE)) | 806 | spin_lock_irq(&kernfs_notify_lock); |
| 807 | kn = kernfs_notify_list; | ||
| 808 | if (kn == KERNFS_NOTIFY_EOL) { | ||
| 809 | spin_unlock_irq(&kernfs_notify_lock); | ||
| 800 | return; | 810 | return; |
| 811 | } | ||
| 812 | kernfs_notify_list = kn->attr.notify_next; | ||
| 813 | kn->attr.notify_next = NULL; | ||
| 814 | spin_unlock_irq(&kernfs_notify_lock); | ||
| 801 | 815 | ||
| 802 | /* kick poll */ | 816 | /* kick poll */ |
| 803 | spin_lock_irqsave(&kernfs_open_node_lock, flags); | 817 | spin_lock_irq(&kernfs_open_node_lock); |
| 804 | 818 | ||
| 805 | on = kn->attr.open; | 819 | on = kn->attr.open; |
| 806 | if (on) { | 820 | if (on) { |
| @@ -808,12 +822,12 @@ void kernfs_notify(struct kernfs_node *kn) | |||
| 808 | wake_up_interruptible(&on->poll); | 822 | wake_up_interruptible(&on->poll); |
| 809 | } | 823 | } |
| 810 | 824 | ||
| 811 | spin_unlock_irqrestore(&kernfs_open_node_lock, flags); | 825 | spin_unlock_irq(&kernfs_open_node_lock); |
| 812 | 826 | ||
| 813 | /* kick fsnotify */ | 827 | /* kick fsnotify */ |
| 814 | mutex_lock(&kernfs_mutex); | 828 | mutex_lock(&kernfs_mutex); |
| 815 | 829 | ||
| 816 | list_for_each_entry(info, &root->supers, node) { | 830 | list_for_each_entry(info, &kernfs_root(kn)->supers, node) { |
| 817 | struct inode *inode; | 831 | struct inode *inode; |
| 818 | struct dentry *dentry; | 832 | struct dentry *dentry; |
| 819 | 833 | ||
| @@ -833,6 +847,33 @@ void kernfs_notify(struct kernfs_node *kn) | |||
| 833 | } | 847 | } |
| 834 | 848 | ||
| 835 | mutex_unlock(&kernfs_mutex); | 849 | mutex_unlock(&kernfs_mutex); |
| 850 | kernfs_put(kn); | ||
| 851 | goto repeat; | ||
| 852 | } | ||
| 853 | |||
| 854 | /** | ||
| 855 | * kernfs_notify - notify a kernfs file | ||
| 856 | * @kn: file to notify | ||
| 857 | * | ||
| 858 | * Notify @kn such that poll(2) on @kn wakes up. Maybe be called from any | ||
| 859 | * context. | ||
| 860 | */ | ||
| 861 | void kernfs_notify(struct kernfs_node *kn) | ||
| 862 | { | ||
| 863 | static DECLARE_WORK(kernfs_notify_work, kernfs_notify_workfn); | ||
| 864 | unsigned long flags; | ||
| 865 | |||
| 866 | if (WARN_ON(kernfs_type(kn) != KERNFS_FILE)) | ||
| 867 | return; | ||
| 868 | |||
| 869 | spin_lock_irqsave(&kernfs_notify_lock, flags); | ||
| 870 | if (!kn->attr.notify_next) { | ||
| 871 | kernfs_get(kn); | ||
| 872 | kn->attr.notify_next = kernfs_notify_list; | ||
| 873 | kernfs_notify_list = kn; | ||
| 874 | schedule_work(&kernfs_notify_work); | ||
| 875 | } | ||
| 876 | spin_unlock_irqrestore(&kernfs_notify_lock, flags); | ||
| 836 | } | 877 | } |
| 837 | EXPORT_SYMBOL_GPL(kernfs_notify); | 878 | EXPORT_SYMBOL_GPL(kernfs_notify); |
| 838 | 879 | ||
diff --git a/fs/locks.c b/fs/locks.c index da57c9b7e844..717fbc404e6b 100644 --- a/fs/locks.c +++ b/fs/locks.c | |||
| @@ -431,7 +431,7 @@ static int lease_init(struct file *filp, long type, struct file_lock *fl) | |||
| 431 | if (assign_type(fl, type) != 0) | 431 | if (assign_type(fl, type) != 0) |
| 432 | return -EINVAL; | 432 | return -EINVAL; |
| 433 | 433 | ||
| 434 | fl->fl_owner = (fl_owner_t)filp; | 434 | fl->fl_owner = (fl_owner_t)current->files; |
| 435 | fl->fl_pid = current->tgid; | 435 | fl->fl_pid = current->tgid; |
| 436 | 436 | ||
| 437 | fl->fl_file = filp; | 437 | fl->fl_file = filp; |
diff --git a/fs/logfs/file.c b/fs/logfs/file.c index 57914fc32b62..8538752df2f6 100644 --- a/fs/logfs/file.c +++ b/fs/logfs/file.c | |||
| @@ -264,15 +264,15 @@ const struct inode_operations logfs_reg_iops = { | |||
| 264 | }; | 264 | }; |
| 265 | 265 | ||
| 266 | const struct file_operations logfs_reg_fops = { | 266 | const struct file_operations logfs_reg_fops = { |
| 267 | .aio_read = generic_file_aio_read, | 267 | .read_iter = generic_file_read_iter, |
| 268 | .aio_write = generic_file_aio_write, | 268 | .write_iter = generic_file_write_iter, |
| 269 | .fsync = logfs_fsync, | 269 | .fsync = logfs_fsync, |
| 270 | .unlocked_ioctl = logfs_ioctl, | 270 | .unlocked_ioctl = logfs_ioctl, |
| 271 | .llseek = generic_file_llseek, | 271 | .llseek = generic_file_llseek, |
| 272 | .mmap = generic_file_readonly_mmap, | 272 | .mmap = generic_file_readonly_mmap, |
| 273 | .open = generic_file_open, | 273 | .open = generic_file_open, |
| 274 | .read = do_sync_read, | 274 | .read = new_sync_read, |
| 275 | .write = do_sync_write, | 275 | .write = new_sync_write, |
| 276 | }; | 276 | }; |
| 277 | 277 | ||
| 278 | const struct address_space_operations logfs_reg_aops = { | 278 | const struct address_space_operations logfs_reg_aops = { |
diff --git a/fs/mbcache.c b/fs/mbcache.c index bf166e388f0d..187477ded6b3 100644 --- a/fs/mbcache.c +++ b/fs/mbcache.c | |||
| @@ -73,6 +73,7 @@ | |||
| 73 | #include <linux/mbcache.h> | 73 | #include <linux/mbcache.h> |
| 74 | #include <linux/init.h> | 74 | #include <linux/init.h> |
| 75 | #include <linux/blockgroup_lock.h> | 75 | #include <linux/blockgroup_lock.h> |
| 76 | #include <linux/log2.h> | ||
| 76 | 77 | ||
| 77 | #ifdef MB_CACHE_DEBUG | 78 | #ifdef MB_CACHE_DEBUG |
| 78 | # define mb_debug(f...) do { \ | 79 | # define mb_debug(f...) do { \ |
| @@ -93,7 +94,7 @@ | |||
| 93 | 94 | ||
| 94 | #define MB_CACHE_WRITER ((unsigned short)~0U >> 1) | 95 | #define MB_CACHE_WRITER ((unsigned short)~0U >> 1) |
| 95 | 96 | ||
| 96 | #define MB_CACHE_ENTRY_LOCK_BITS __builtin_log2(NR_BG_LOCKS) | 97 | #define MB_CACHE_ENTRY_LOCK_BITS ilog2(NR_BG_LOCKS) |
| 97 | #define MB_CACHE_ENTRY_LOCK_INDEX(ce) \ | 98 | #define MB_CACHE_ENTRY_LOCK_INDEX(ce) \ |
| 98 | (hash_long((unsigned long)ce, MB_CACHE_ENTRY_LOCK_BITS)) | 99 | (hash_long((unsigned long)ce, MB_CACHE_ENTRY_LOCK_BITS)) |
| 99 | 100 | ||
diff --git a/fs/minix/file.c b/fs/minix/file.c index adc6f5494231..a967de085ac0 100644 --- a/fs/minix/file.c +++ b/fs/minix/file.c | |||
| @@ -14,10 +14,10 @@ | |||
| 14 | */ | 14 | */ |
| 15 | const struct file_operations minix_file_operations = { | 15 | const struct file_operations minix_file_operations = { |
| 16 | .llseek = generic_file_llseek, | 16 | .llseek = generic_file_llseek, |
| 17 | .read = do_sync_read, | 17 | .read = new_sync_read, |
| 18 | .aio_read = generic_file_aio_read, | 18 | .read_iter = generic_file_read_iter, |
| 19 | .write = do_sync_write, | 19 | .write = new_sync_write, |
| 20 | .aio_write = generic_file_aio_write, | 20 | .write_iter = generic_file_write_iter, |
| 21 | .mmap = generic_file_mmap, | 21 | .mmap = generic_file_mmap, |
| 22 | .fsync = generic_file_fsync, | 22 | .fsync = generic_file_fsync, |
| 23 | .splice_read = generic_file_splice_read, | 23 | .splice_read = generic_file_splice_read, |
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 4ad7bc388679..8f98138cbc43 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c | |||
| @@ -212,20 +212,20 @@ static int nfs_direct_cmp_commit_data_verf(struct nfs_direct_req *dreq, | |||
| 212 | * shunt off direct read and write requests before the VFS gets them, | 212 | * shunt off direct read and write requests before the VFS gets them, |
| 213 | * so this method is only ever called for swap. | 213 | * so this method is only ever called for swap. |
| 214 | */ | 214 | */ |
| 215 | ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t pos, unsigned long nr_segs) | 215 | ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, loff_t pos) |
| 216 | { | 216 | { |
| 217 | #ifndef CONFIG_NFS_SWAP | 217 | #ifndef CONFIG_NFS_SWAP |
| 218 | dprintk("NFS: nfs_direct_IO (%pD) off/no(%Ld/%lu) EINVAL\n", | 218 | dprintk("NFS: nfs_direct_IO (%pD) off/no(%Ld/%lu) EINVAL\n", |
| 219 | iocb->ki_filp, (long long) pos, nr_segs); | 219 | iocb->ki_filp, (long long) pos, iter->nr_segs); |
| 220 | 220 | ||
| 221 | return -EINVAL; | 221 | return -EINVAL; |
| 222 | #else | 222 | #else |
| 223 | VM_BUG_ON(iocb->ki_nbytes != PAGE_SIZE); | 223 | VM_BUG_ON(iocb->ki_nbytes != PAGE_SIZE); |
| 224 | 224 | ||
| 225 | if (rw == READ || rw == KERNEL_READ) | 225 | if (rw == READ || rw == KERNEL_READ) |
| 226 | return nfs_file_direct_read(iocb, iov, nr_segs, pos, | 226 | return nfs_file_direct_read(iocb, iter, pos, |
| 227 | rw == READ ? true : false); | 227 | rw == READ ? true : false); |
| 228 | return nfs_file_direct_write(iocb, iov, nr_segs, pos, | 228 | return nfs_file_direct_write(iocb, iter, pos, |
| 229 | rw == WRITE ? true : false); | 229 | rw == WRITE ? true : false); |
| 230 | #endif /* CONFIG_NFS_SWAP */ | 230 | #endif /* CONFIG_NFS_SWAP */ |
| 231 | } | 231 | } |
| @@ -414,60 +414,37 @@ static const struct nfs_pgio_completion_ops nfs_direct_read_completion_ops = { | |||
| 414 | * handled automatically by nfs_direct_read_result(). Otherwise, if | 414 | * handled automatically by nfs_direct_read_result(). Otherwise, if |
| 415 | * no requests have been sent, just return an error. | 415 | * no requests have been sent, just return an error. |
| 416 | */ | 416 | */ |
| 417 | static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *desc, | ||
| 418 | const struct iovec *iov, | ||
| 419 | loff_t pos, bool uio) | ||
| 420 | { | ||
| 421 | struct nfs_direct_req *dreq = desc->pg_dreq; | ||
| 422 | struct nfs_open_context *ctx = dreq->ctx; | ||
| 423 | struct inode *inode = ctx->dentry->d_inode; | ||
| 424 | unsigned long user_addr = (unsigned long)iov->iov_base; | ||
| 425 | size_t count = iov->iov_len; | ||
| 426 | size_t rsize = NFS_SERVER(inode)->rsize; | ||
| 427 | unsigned int pgbase; | ||
| 428 | int result; | ||
| 429 | ssize_t started = 0; | ||
| 430 | struct page **pagevec = NULL; | ||
| 431 | unsigned int npages; | ||
| 432 | |||
| 433 | do { | ||
| 434 | size_t bytes; | ||
| 435 | int i; | ||
| 436 | 417 | ||
| 437 | pgbase = user_addr & ~PAGE_MASK; | 418 | static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq, |
| 438 | bytes = min(max_t(size_t, rsize, PAGE_SIZE), count); | 419 | struct iov_iter *iter, |
| 420 | loff_t pos) | ||
| 421 | { | ||
| 422 | struct nfs_pageio_descriptor desc; | ||
| 423 | struct inode *inode = dreq->inode; | ||
| 424 | ssize_t result = -EINVAL; | ||
| 425 | size_t requested_bytes = 0; | ||
| 426 | size_t rsize = max_t(size_t, NFS_SERVER(inode)->rsize, PAGE_SIZE); | ||
| 439 | 427 | ||
| 440 | result = -ENOMEM; | 428 | nfs_pageio_init_read(&desc, dreq->inode, false, |
| 441 | npages = nfs_page_array_len(pgbase, bytes); | 429 | &nfs_direct_read_completion_ops); |
| 442 | if (!pagevec) | 430 | get_dreq(dreq); |
| 443 | pagevec = kmalloc(npages * sizeof(struct page *), | 431 | desc.pg_dreq = dreq; |
| 444 | GFP_KERNEL); | 432 | atomic_inc(&inode->i_dio_count); |
| 445 | if (!pagevec) | ||
| 446 | break; | ||
| 447 | if (uio) { | ||
| 448 | down_read(¤t->mm->mmap_sem); | ||
| 449 | result = get_user_pages(current, current->mm, user_addr, | ||
| 450 | npages, 1, 0, pagevec, NULL); | ||
| 451 | up_read(¤t->mm->mmap_sem); | ||
| 452 | if (result < 0) | ||
| 453 | break; | ||
| 454 | } else { | ||
| 455 | WARN_ON(npages != 1); | ||
| 456 | result = get_kernel_page(user_addr, 1, pagevec); | ||
| 457 | if (WARN_ON(result != 1)) | ||
| 458 | break; | ||
| 459 | } | ||
| 460 | 433 | ||
| 461 | if ((unsigned)result < npages) { | 434 | while (iov_iter_count(iter)) { |
| 462 | bytes = result * PAGE_SIZE; | 435 | struct page **pagevec; |
| 463 | if (bytes <= pgbase) { | 436 | size_t bytes; |
| 464 | nfs_direct_release_pages(pagevec, result); | 437 | size_t pgbase; |
| 465 | break; | 438 | unsigned npages, i; |
| 466 | } | ||
| 467 | bytes -= pgbase; | ||
| 468 | npages = result; | ||
| 469 | } | ||
| 470 | 439 | ||
| 440 | result = iov_iter_get_pages_alloc(iter, &pagevec, | ||
| 441 | rsize, &pgbase); | ||
| 442 | if (result < 0) | ||
| 443 | break; | ||
| 444 | |||
| 445 | bytes = result; | ||
| 446 | iov_iter_advance(iter, bytes); | ||
| 447 | npages = (result + pgbase + PAGE_SIZE - 1) / PAGE_SIZE; | ||
| 471 | for (i = 0; i < npages; i++) { | 448 | for (i = 0; i < npages; i++) { |
| 472 | struct nfs_page *req; | 449 | struct nfs_page *req; |
| 473 | unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase); | 450 | unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase); |
| @@ -480,56 +457,21 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *de | |||
| 480 | } | 457 | } |
| 481 | req->wb_index = pos >> PAGE_SHIFT; | 458 | req->wb_index = pos >> PAGE_SHIFT; |
| 482 | req->wb_offset = pos & ~PAGE_MASK; | 459 | req->wb_offset = pos & ~PAGE_MASK; |
| 483 | if (!nfs_pageio_add_request(desc, req)) { | 460 | if (!nfs_pageio_add_request(&desc, req)) { |
| 484 | result = desc->pg_error; | 461 | result = desc.pg_error; |
| 485 | nfs_release_request(req); | 462 | nfs_release_request(req); |
| 486 | break; | 463 | break; |
| 487 | } | 464 | } |
| 488 | pgbase = 0; | 465 | pgbase = 0; |
| 489 | bytes -= req_len; | 466 | bytes -= req_len; |
| 490 | started += req_len; | 467 | requested_bytes += req_len; |
| 491 | user_addr += req_len; | ||
| 492 | pos += req_len; | 468 | pos += req_len; |
| 493 | count -= req_len; | ||
| 494 | dreq->bytes_left -= req_len; | 469 | dreq->bytes_left -= req_len; |
| 495 | } | 470 | } |
| 496 | /* The nfs_page now hold references to these pages */ | ||
| 497 | nfs_direct_release_pages(pagevec, npages); | 471 | nfs_direct_release_pages(pagevec, npages); |
| 498 | } while (count != 0 && result >= 0); | 472 | kvfree(pagevec); |
| 499 | |||
| 500 | kfree(pagevec); | ||
| 501 | |||
| 502 | if (started) | ||
| 503 | return started; | ||
| 504 | return result < 0 ? (ssize_t) result : -EFAULT; | ||
| 505 | } | ||
| 506 | |||
| 507 | static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq, | ||
| 508 | const struct iovec *iov, | ||
| 509 | unsigned long nr_segs, | ||
| 510 | loff_t pos, bool uio) | ||
| 511 | { | ||
| 512 | struct nfs_pageio_descriptor desc; | ||
| 513 | struct inode *inode = dreq->inode; | ||
| 514 | ssize_t result = -EINVAL; | ||
| 515 | size_t requested_bytes = 0; | ||
| 516 | unsigned long seg; | ||
| 517 | |||
| 518 | nfs_pageio_init_read(&desc, dreq->inode, false, | ||
| 519 | &nfs_direct_read_completion_ops); | ||
| 520 | get_dreq(dreq); | ||
| 521 | desc.pg_dreq = dreq; | ||
| 522 | atomic_inc(&inode->i_dio_count); | ||
| 523 | |||
| 524 | for (seg = 0; seg < nr_segs; seg++) { | ||
| 525 | const struct iovec *vec = &iov[seg]; | ||
| 526 | result = nfs_direct_read_schedule_segment(&desc, vec, pos, uio); | ||
| 527 | if (result < 0) | 473 | if (result < 0) |
| 528 | break; | 474 | break; |
| 529 | requested_bytes += result; | ||
| 530 | if ((size_t)result < vec->iov_len) | ||
| 531 | break; | ||
| 532 | pos += vec->iov_len; | ||
| 533 | } | 475 | } |
| 534 | 476 | ||
| 535 | nfs_pageio_complete(&desc); | 477 | nfs_pageio_complete(&desc); |
| @@ -552,8 +494,7 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq, | |||
| 552 | /** | 494 | /** |
| 553 | * nfs_file_direct_read - file direct read operation for NFS files | 495 | * nfs_file_direct_read - file direct read operation for NFS files |
| 554 | * @iocb: target I/O control block | 496 | * @iocb: target I/O control block |
| 555 | * @iov: vector of user buffers into which to read data | 497 | * @iter: vector of user buffers into which to read data |
| 556 | * @nr_segs: size of iov vector | ||
| 557 | * @pos: byte offset in file where reading starts | 498 | * @pos: byte offset in file where reading starts |
| 558 | * | 499 | * |
| 559 | * We use this function for direct reads instead of calling | 500 | * We use this function for direct reads instead of calling |
| @@ -570,8 +511,8 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq, | |||
| 570 | * client must read the updated atime from the server back into its | 511 | * client must read the updated atime from the server back into its |
| 571 | * cache. | 512 | * cache. |
| 572 | */ | 513 | */ |
| 573 | ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov, | 514 | ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter, |
| 574 | unsigned long nr_segs, loff_t pos, bool uio) | 515 | loff_t pos, bool uio) |
| 575 | { | 516 | { |
| 576 | struct file *file = iocb->ki_filp; | 517 | struct file *file = iocb->ki_filp; |
| 577 | struct address_space *mapping = file->f_mapping; | 518 | struct address_space *mapping = file->f_mapping; |
| @@ -579,9 +520,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov, | |||
| 579 | struct nfs_direct_req *dreq; | 520 | struct nfs_direct_req *dreq; |
| 580 | struct nfs_lock_context *l_ctx; | 521 | struct nfs_lock_context *l_ctx; |
| 581 | ssize_t result = -EINVAL; | 522 | ssize_t result = -EINVAL; |
| 582 | size_t count; | 523 | size_t count = iov_iter_count(iter); |
| 583 | |||
| 584 | count = iov_length(iov, nr_segs); | ||
| 585 | nfs_add_stats(mapping->host, NFSIOS_DIRECTREADBYTES, count); | 524 | nfs_add_stats(mapping->host, NFSIOS_DIRECTREADBYTES, count); |
| 586 | 525 | ||
| 587 | dfprintk(FILE, "NFS: direct read(%pD2, %zd@%Ld)\n", | 526 | dfprintk(FILE, "NFS: direct read(%pD2, %zd@%Ld)\n", |
| @@ -604,7 +543,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov, | |||
| 604 | goto out_unlock; | 543 | goto out_unlock; |
| 605 | 544 | ||
| 606 | dreq->inode = inode; | 545 | dreq->inode = inode; |
| 607 | dreq->bytes_left = iov_length(iov, nr_segs); | 546 | dreq->bytes_left = count; |
| 608 | dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp)); | 547 | dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp)); |
| 609 | l_ctx = nfs_get_lock_context(dreq->ctx); | 548 | l_ctx = nfs_get_lock_context(dreq->ctx); |
| 610 | if (IS_ERR(l_ctx)) { | 549 | if (IS_ERR(l_ctx)) { |
| @@ -615,8 +554,8 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov, | |||
| 615 | if (!is_sync_kiocb(iocb)) | 554 | if (!is_sync_kiocb(iocb)) |
| 616 | dreq->iocb = iocb; | 555 | dreq->iocb = iocb; |
| 617 | 556 | ||
| 618 | NFS_I(inode)->read_io += iov_length(iov, nr_segs); | 557 | NFS_I(inode)->read_io += count; |
| 619 | result = nfs_direct_read_schedule_iovec(dreq, iov, nr_segs, pos, uio); | 558 | result = nfs_direct_read_schedule_iovec(dreq, iter, pos); |
| 620 | 559 | ||
| 621 | mutex_unlock(&inode->i_mutex); | 560 | mutex_unlock(&inode->i_mutex); |
| 622 | 561 | ||
| @@ -772,108 +711,6 @@ static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode | |||
| 772 | } | 711 | } |
| 773 | #endif | 712 | #endif |
| 774 | 713 | ||
| 775 | /* | ||
| 776 | * NB: Return the value of the first error return code. Subsequent | ||
| 777 | * errors after the first one are ignored. | ||
| 778 | */ | ||
| 779 | /* | ||
| 780 | * For each wsize'd chunk of the user's buffer, dispatch an NFS WRITE | ||
| 781 | * operation. If nfs_writedata_alloc() or get_user_pages() fails, | ||
| 782 | * bail and stop sending more writes. Write length accounting is | ||
| 783 | * handled automatically by nfs_direct_write_result(). Otherwise, if | ||
| 784 | * no requests have been sent, just return an error. | ||
| 785 | */ | ||
| 786 | static ssize_t nfs_direct_write_schedule_segment(struct nfs_pageio_descriptor *desc, | ||
| 787 | const struct iovec *iov, | ||
| 788 | loff_t pos, bool uio) | ||
| 789 | { | ||
| 790 | struct nfs_direct_req *dreq = desc->pg_dreq; | ||
| 791 | struct nfs_open_context *ctx = dreq->ctx; | ||
| 792 | struct inode *inode = ctx->dentry->d_inode; | ||
| 793 | unsigned long user_addr = (unsigned long)iov->iov_base; | ||
| 794 | size_t count = iov->iov_len; | ||
| 795 | size_t wsize = NFS_SERVER(inode)->wsize; | ||
| 796 | unsigned int pgbase; | ||
| 797 | int result; | ||
| 798 | ssize_t started = 0; | ||
| 799 | struct page **pagevec = NULL; | ||
| 800 | unsigned int npages; | ||
| 801 | |||
| 802 | do { | ||
| 803 | size_t bytes; | ||
| 804 | int i; | ||
| 805 | |||
| 806 | pgbase = user_addr & ~PAGE_MASK; | ||
| 807 | bytes = min(max_t(size_t, wsize, PAGE_SIZE), count); | ||
| 808 | |||
| 809 | result = -ENOMEM; | ||
| 810 | npages = nfs_page_array_len(pgbase, bytes); | ||
| 811 | if (!pagevec) | ||
| 812 | pagevec = kmalloc(npages * sizeof(struct page *), GFP_KERNEL); | ||
| 813 | if (!pagevec) | ||
| 814 | break; | ||
| 815 | |||
| 816 | if (uio) { | ||
| 817 | down_read(¤t->mm->mmap_sem); | ||
| 818 | result = get_user_pages(current, current->mm, user_addr, | ||
| 819 | npages, 0, 0, pagevec, NULL); | ||
| 820 | up_read(¤t->mm->mmap_sem); | ||
| 821 | if (result < 0) | ||
| 822 | break; | ||
| 823 | } else { | ||
| 824 | WARN_ON(npages != 1); | ||
| 825 | result = get_kernel_page(user_addr, 0, pagevec); | ||
| 826 | if (WARN_ON(result != 1)) | ||
| 827 | break; | ||
| 828 | } | ||
| 829 | |||
| 830 | if ((unsigned)result < npages) { | ||
| 831 | bytes = result * PAGE_SIZE; | ||
| 832 | if (bytes <= pgbase) { | ||
| 833 | nfs_direct_release_pages(pagevec, result); | ||
| 834 | break; | ||
| 835 | } | ||
| 836 | bytes -= pgbase; | ||
| 837 | npages = result; | ||
| 838 | } | ||
| 839 | |||
| 840 | for (i = 0; i < npages; i++) { | ||
| 841 | struct nfs_page *req; | ||
| 842 | unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase); | ||
| 843 | |||
| 844 | req = nfs_create_request(dreq->ctx, pagevec[i], NULL, | ||
| 845 | pgbase, req_len); | ||
| 846 | if (IS_ERR(req)) { | ||
| 847 | result = PTR_ERR(req); | ||
| 848 | break; | ||
| 849 | } | ||
| 850 | nfs_lock_request(req); | ||
| 851 | req->wb_index = pos >> PAGE_SHIFT; | ||
| 852 | req->wb_offset = pos & ~PAGE_MASK; | ||
| 853 | if (!nfs_pageio_add_request(desc, req)) { | ||
| 854 | result = desc->pg_error; | ||
| 855 | nfs_unlock_and_release_request(req); | ||
| 856 | break; | ||
| 857 | } | ||
| 858 | pgbase = 0; | ||
| 859 | bytes -= req_len; | ||
| 860 | started += req_len; | ||
| 861 | user_addr += req_len; | ||
| 862 | pos += req_len; | ||
| 863 | count -= req_len; | ||
| 864 | dreq->bytes_left -= req_len; | ||
| 865 | } | ||
| 866 | /* The nfs_page now hold references to these pages */ | ||
| 867 | nfs_direct_release_pages(pagevec, npages); | ||
| 868 | } while (count != 0 && result >= 0); | ||
| 869 | |||
| 870 | kfree(pagevec); | ||
| 871 | |||
| 872 | if (started) | ||
| 873 | return started; | ||
| 874 | return result < 0 ? (ssize_t) result : -EFAULT; | ||
| 875 | } | ||
| 876 | |||
| 877 | static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) | 714 | static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) |
| 878 | { | 715 | { |
| 879 | struct nfs_direct_req *dreq = hdr->dreq; | 716 | struct nfs_direct_req *dreq = hdr->dreq; |
| @@ -956,16 +793,27 @@ static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops = { | |||
| 956 | .completion = nfs_direct_write_completion, | 793 | .completion = nfs_direct_write_completion, |
| 957 | }; | 794 | }; |
| 958 | 795 | ||
| 796 | |||
| 797 | /* | ||
| 798 | * NB: Return the value of the first error return code. Subsequent | ||
| 799 | * errors after the first one are ignored. | ||
| 800 | */ | ||
| 801 | /* | ||
| 802 | * For each wsize'd chunk of the user's buffer, dispatch an NFS WRITE | ||
| 803 | * operation. If nfs_writedata_alloc() or get_user_pages() fails, | ||
| 804 | * bail and stop sending more writes. Write length accounting is | ||
| 805 | * handled automatically by nfs_direct_write_result(). Otherwise, if | ||
| 806 | * no requests have been sent, just return an error. | ||
| 807 | */ | ||
| 959 | static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, | 808 | static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, |
| 960 | const struct iovec *iov, | 809 | struct iov_iter *iter, |
| 961 | unsigned long nr_segs, | 810 | loff_t pos) |
| 962 | loff_t pos, bool uio) | ||
| 963 | { | 811 | { |
| 964 | struct nfs_pageio_descriptor desc; | 812 | struct nfs_pageio_descriptor desc; |
| 965 | struct inode *inode = dreq->inode; | 813 | struct inode *inode = dreq->inode; |
| 966 | ssize_t result = 0; | 814 | ssize_t result = 0; |
| 967 | size_t requested_bytes = 0; | 815 | size_t requested_bytes = 0; |
| 968 | unsigned long seg; | 816 | size_t wsize = max_t(size_t, NFS_SERVER(inode)->wsize, PAGE_SIZE); |
| 969 | 817 | ||
| 970 | nfs_pageio_init_write(&desc, inode, FLUSH_COND_STABLE, false, | 818 | nfs_pageio_init_write(&desc, inode, FLUSH_COND_STABLE, false, |
| 971 | &nfs_direct_write_completion_ops); | 819 | &nfs_direct_write_completion_ops); |
| @@ -973,16 +821,49 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, | |||
| 973 | get_dreq(dreq); | 821 | get_dreq(dreq); |
| 974 | atomic_inc(&inode->i_dio_count); | 822 | atomic_inc(&inode->i_dio_count); |
| 975 | 823 | ||
| 976 | NFS_I(dreq->inode)->write_io += iov_length(iov, nr_segs); | 824 | NFS_I(inode)->write_io += iov_iter_count(iter); |
| 977 | for (seg = 0; seg < nr_segs; seg++) { | 825 | while (iov_iter_count(iter)) { |
| 978 | const struct iovec *vec = &iov[seg]; | 826 | struct page **pagevec; |
| 979 | result = nfs_direct_write_schedule_segment(&desc, vec, pos, uio); | 827 | size_t bytes; |
| 828 | size_t pgbase; | ||
| 829 | unsigned npages, i; | ||
| 830 | |||
| 831 | result = iov_iter_get_pages_alloc(iter, &pagevec, | ||
| 832 | wsize, &pgbase); | ||
| 980 | if (result < 0) | 833 | if (result < 0) |
| 981 | break; | 834 | break; |
| 982 | requested_bytes += result; | 835 | |
| 983 | if ((size_t)result < vec->iov_len) | 836 | bytes = result; |
| 837 | iov_iter_advance(iter, bytes); | ||
| 838 | npages = (result + pgbase + PAGE_SIZE - 1) / PAGE_SIZE; | ||
| 839 | for (i = 0; i < npages; i++) { | ||
| 840 | struct nfs_page *req; | ||
| 841 | unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase); | ||
| 842 | |||
| 843 | req = nfs_create_request(dreq->ctx, pagevec[i], NULL, | ||
| 844 | pgbase, req_len); | ||
| 845 | if (IS_ERR(req)) { | ||
| 846 | result = PTR_ERR(req); | ||
| 847 | break; | ||
| 848 | } | ||
| 849 | nfs_lock_request(req); | ||
| 850 | req->wb_index = pos >> PAGE_SHIFT; | ||
| 851 | req->wb_offset = pos & ~PAGE_MASK; | ||
| 852 | if (!nfs_pageio_add_request(&desc, req)) { | ||
| 853 | result = desc.pg_error; | ||
| 854 | nfs_unlock_and_release_request(req); | ||
| 855 | break; | ||
| 856 | } | ||
| 857 | pgbase = 0; | ||
| 858 | bytes -= req_len; | ||
| 859 | requested_bytes += req_len; | ||
| 860 | pos += req_len; | ||
| 861 | dreq->bytes_left -= req_len; | ||
| 862 | } | ||
| 863 | nfs_direct_release_pages(pagevec, npages); | ||
| 864 | kvfree(pagevec); | ||
| 865 | if (result < 0) | ||
| 984 | break; | 866 | break; |
| 985 | pos += vec->iov_len; | ||
| 986 | } | 867 | } |
| 987 | nfs_pageio_complete(&desc); | 868 | nfs_pageio_complete(&desc); |
| 988 | 869 | ||
| @@ -1004,8 +885,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, | |||
| 1004 | /** | 885 | /** |
| 1005 | * nfs_file_direct_write - file direct write operation for NFS files | 886 | * nfs_file_direct_write - file direct write operation for NFS files |
| 1006 | * @iocb: target I/O control block | 887 | * @iocb: target I/O control block |
| 1007 | * @iov: vector of user buffers from which to write data | 888 | * @iter: vector of user buffers from which to write data |
| 1008 | * @nr_segs: size of iov vector | ||
| 1009 | * @pos: byte offset in file where writing starts | 889 | * @pos: byte offset in file where writing starts |
| 1010 | * | 890 | * |
| 1011 | * We use this function for direct writes instead of calling | 891 | * We use this function for direct writes instead of calling |
| @@ -1023,8 +903,8 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, | |||
| 1023 | * Note that O_APPEND is not supported for NFS direct writes, as there | 903 | * Note that O_APPEND is not supported for NFS direct writes, as there |
| 1024 | * is no atomic O_APPEND write facility in the NFS protocol. | 904 | * is no atomic O_APPEND write facility in the NFS protocol. |
| 1025 | */ | 905 | */ |
| 1026 | ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov, | 906 | ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter, |
| 1027 | unsigned long nr_segs, loff_t pos, bool uio) | 907 | loff_t pos, bool uio) |
| 1028 | { | 908 | { |
| 1029 | ssize_t result = -EINVAL; | 909 | ssize_t result = -EINVAL; |
| 1030 | struct file *file = iocb->ki_filp; | 910 | struct file *file = iocb->ki_filp; |
| @@ -1033,9 +913,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov, | |||
| 1033 | struct nfs_direct_req *dreq; | 913 | struct nfs_direct_req *dreq; |
| 1034 | struct nfs_lock_context *l_ctx; | 914 | struct nfs_lock_context *l_ctx; |
| 1035 | loff_t end; | 915 | loff_t end; |
| 1036 | size_t count; | 916 | size_t count = iov_iter_count(iter); |
| 1037 | |||
| 1038 | count = iov_length(iov, nr_segs); | ||
| 1039 | end = (pos + count - 1) >> PAGE_CACHE_SHIFT; | 917 | end = (pos + count - 1) >> PAGE_CACHE_SHIFT; |
| 1040 | 918 | ||
| 1041 | nfs_add_stats(mapping->host, NFSIOS_DIRECTWRITTENBYTES, count); | 919 | nfs_add_stats(mapping->host, NFSIOS_DIRECTWRITTENBYTES, count); |
| @@ -1086,7 +964,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov, | |||
| 1086 | if (!is_sync_kiocb(iocb)) | 964 | if (!is_sync_kiocb(iocb)) |
| 1087 | dreq->iocb = iocb; | 965 | dreq->iocb = iocb; |
| 1088 | 966 | ||
| 1089 | result = nfs_direct_write_schedule_iovec(dreq, iov, nr_segs, pos, uio); | 967 | result = nfs_direct_write_schedule_iovec(dreq, iter, pos); |
| 1090 | 968 | ||
| 1091 | if (mapping->nrpages) { | 969 | if (mapping->nrpages) { |
| 1092 | invalidate_inode_pages2_range(mapping, | 970 | invalidate_inode_pages2_range(mapping, |
diff --git a/fs/nfs/file.c b/fs/nfs/file.c index c1edf7336315..4042ff58fe3f 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c | |||
| @@ -165,22 +165,21 @@ nfs_file_flush(struct file *file, fl_owner_t id) | |||
| 165 | EXPORT_SYMBOL_GPL(nfs_file_flush); | 165 | EXPORT_SYMBOL_GPL(nfs_file_flush); |
| 166 | 166 | ||
| 167 | ssize_t | 167 | ssize_t |
| 168 | nfs_file_read(struct kiocb *iocb, const struct iovec *iov, | 168 | nfs_file_read(struct kiocb *iocb, struct iov_iter *to) |
| 169 | unsigned long nr_segs, loff_t pos) | ||
| 170 | { | 169 | { |
| 171 | struct inode *inode = file_inode(iocb->ki_filp); | 170 | struct inode *inode = file_inode(iocb->ki_filp); |
| 172 | ssize_t result; | 171 | ssize_t result; |
| 173 | 172 | ||
| 174 | if (iocb->ki_filp->f_flags & O_DIRECT) | 173 | if (iocb->ki_filp->f_flags & O_DIRECT) |
| 175 | return nfs_file_direct_read(iocb, iov, nr_segs, pos, true); | 174 | return nfs_file_direct_read(iocb, to, iocb->ki_pos, true); |
| 176 | 175 | ||
| 177 | dprintk("NFS: read(%pD2, %lu@%lu)\n", | 176 | dprintk("NFS: read(%pD2, %zu@%lu)\n", |
| 178 | iocb->ki_filp, | 177 | iocb->ki_filp, |
| 179 | (unsigned long) iov_length(iov, nr_segs), (unsigned long) pos); | 178 | iov_iter_count(to), (unsigned long) iocb->ki_pos); |
| 180 | 179 | ||
| 181 | result = nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping); | 180 | result = nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping); |
| 182 | if (!result) { | 181 | if (!result) { |
| 183 | result = generic_file_aio_read(iocb, iov, nr_segs, pos); | 182 | result = generic_file_read_iter(iocb, to); |
| 184 | if (result > 0) | 183 | if (result > 0) |
| 185 | nfs_add_stats(inode, NFSIOS_NORMALREADBYTES, result); | 184 | nfs_add_stats(inode, NFSIOS_NORMALREADBYTES, result); |
| 186 | } | 185 | } |
| @@ -635,24 +634,24 @@ static int nfs_need_sync_write(struct file *filp, struct inode *inode) | |||
| 635 | return 0; | 634 | return 0; |
| 636 | } | 635 | } |
| 637 | 636 | ||
| 638 | ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov, | 637 | ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from) |
| 639 | unsigned long nr_segs, loff_t pos) | ||
| 640 | { | 638 | { |
| 641 | struct file *file = iocb->ki_filp; | 639 | struct file *file = iocb->ki_filp; |
| 642 | struct inode *inode = file_inode(file); | 640 | struct inode *inode = file_inode(file); |
| 643 | unsigned long written = 0; | 641 | unsigned long written = 0; |
| 644 | ssize_t result; | 642 | ssize_t result; |
| 645 | size_t count = iov_length(iov, nr_segs); | 643 | size_t count = iov_iter_count(from); |
| 644 | loff_t pos = iocb->ki_pos; | ||
| 646 | 645 | ||
| 647 | result = nfs_key_timeout_notify(file, inode); | 646 | result = nfs_key_timeout_notify(file, inode); |
| 648 | if (result) | 647 | if (result) |
| 649 | return result; | 648 | return result; |
| 650 | 649 | ||
| 651 | if (file->f_flags & O_DIRECT) | 650 | if (file->f_flags & O_DIRECT) |
| 652 | return nfs_file_direct_write(iocb, iov, nr_segs, pos, true); | 651 | return nfs_file_direct_write(iocb, from, pos, true); |
| 653 | 652 | ||
| 654 | dprintk("NFS: write(%pD2, %lu@%Ld)\n", | 653 | dprintk("NFS: write(%pD2, %zu@%Ld)\n", |
| 655 | file, (unsigned long) count, (long long) pos); | 654 | file, count, (long long) pos); |
| 656 | 655 | ||
| 657 | result = -EBUSY; | 656 | result = -EBUSY; |
| 658 | if (IS_SWAPFILE(inode)) | 657 | if (IS_SWAPFILE(inode)) |
| @@ -670,7 +669,7 @@ ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov, | |||
| 670 | if (!count) | 669 | if (!count) |
| 671 | goto out; | 670 | goto out; |
| 672 | 671 | ||
| 673 | result = generic_file_aio_write(iocb, iov, nr_segs, pos); | 672 | result = generic_file_write_iter(iocb, from); |
| 674 | if (result > 0) | 673 | if (result > 0) |
| 675 | written = result; | 674 | written = result; |
| 676 | 675 | ||
| @@ -691,36 +690,6 @@ out_swapfile: | |||
| 691 | } | 690 | } |
| 692 | EXPORT_SYMBOL_GPL(nfs_file_write); | 691 | EXPORT_SYMBOL_GPL(nfs_file_write); |
| 693 | 692 | ||
| 694 | ssize_t nfs_file_splice_write(struct pipe_inode_info *pipe, | ||
| 695 | struct file *filp, loff_t *ppos, | ||
| 696 | size_t count, unsigned int flags) | ||
| 697 | { | ||
| 698 | struct inode *inode = file_inode(filp); | ||
| 699 | unsigned long written = 0; | ||
| 700 | ssize_t ret; | ||
| 701 | |||
| 702 | dprintk("NFS splice_write(%pD2, %lu@%llu)\n", | ||
| 703 | filp, (unsigned long) count, (unsigned long long) *ppos); | ||
| 704 | |||
| 705 | /* | ||
| 706 | * The combination of splice and an O_APPEND destination is disallowed. | ||
| 707 | */ | ||
| 708 | |||
| 709 | ret = generic_file_splice_write(pipe, filp, ppos, count, flags); | ||
| 710 | if (ret > 0) | ||
| 711 | written = ret; | ||
| 712 | |||
| 713 | if (ret >= 0 && nfs_need_sync_write(filp, inode)) { | ||
| 714 | int err = vfs_fsync(filp, 0); | ||
| 715 | if (err < 0) | ||
| 716 | ret = err; | ||
| 717 | } | ||
| 718 | if (ret > 0) | ||
| 719 | nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, written); | ||
| 720 | return ret; | ||
| 721 | } | ||
| 722 | EXPORT_SYMBOL_GPL(nfs_file_splice_write); | ||
| 723 | |||
| 724 | static int | 693 | static int |
| 725 | do_getlk(struct file *filp, int cmd, struct file_lock *fl, int is_local) | 694 | do_getlk(struct file *filp, int cmd, struct file_lock *fl, int is_local) |
| 726 | { | 695 | { |
| @@ -935,10 +904,10 @@ EXPORT_SYMBOL_GPL(nfs_setlease); | |||
| 935 | 904 | ||
| 936 | const struct file_operations nfs_file_operations = { | 905 | const struct file_operations nfs_file_operations = { |
| 937 | .llseek = nfs_file_llseek, | 906 | .llseek = nfs_file_llseek, |
| 938 | .read = do_sync_read, | 907 | .read = new_sync_read, |
| 939 | .write = do_sync_write, | 908 | .write = new_sync_write, |
| 940 | .aio_read = nfs_file_read, | 909 | .read_iter = nfs_file_read, |
| 941 | .aio_write = nfs_file_write, | 910 | .write_iter = nfs_file_write, |
| 942 | .mmap = nfs_file_mmap, | 911 | .mmap = nfs_file_mmap, |
| 943 | .open = nfs_file_open, | 912 | .open = nfs_file_open, |
| 944 | .flush = nfs_file_flush, | 913 | .flush = nfs_file_flush, |
| @@ -947,7 +916,7 @@ const struct file_operations nfs_file_operations = { | |||
| 947 | .lock = nfs_lock, | 916 | .lock = nfs_lock, |
| 948 | .flock = nfs_flock, | 917 | .flock = nfs_flock, |
| 949 | .splice_read = nfs_file_splice_read, | 918 | .splice_read = nfs_file_splice_read, |
| 950 | .splice_write = nfs_file_splice_write, | 919 | .splice_write = iter_file_splice_write, |
| 951 | .check_flags = nfs_check_flags, | 920 | .check_flags = nfs_check_flags, |
| 952 | .setlease = nfs_setlease, | 921 | .setlease = nfs_setlease, |
| 953 | }; | 922 | }; |
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index c496f8a74639..9927913c97c2 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c | |||
| @@ -147,6 +147,17 @@ int nfs_sync_mapping(struct address_space *mapping) | |||
| 147 | return ret; | 147 | return ret; |
| 148 | } | 148 | } |
| 149 | 149 | ||
| 150 | static void nfs_set_cache_invalid(struct inode *inode, unsigned long flags) | ||
| 151 | { | ||
| 152 | struct nfs_inode *nfsi = NFS_I(inode); | ||
| 153 | |||
| 154 | if (inode->i_mapping->nrpages == 0) | ||
| 155 | flags &= ~NFS_INO_INVALID_DATA; | ||
| 156 | nfsi->cache_validity |= flags; | ||
| 157 | if (flags & NFS_INO_INVALID_DATA) | ||
| 158 | nfs_fscache_invalidate(inode); | ||
| 159 | } | ||
| 160 | |||
| 150 | /* | 161 | /* |
| 151 | * Invalidate the local caches | 162 | * Invalidate the local caches |
| 152 | */ | 163 | */ |
| @@ -162,17 +173,16 @@ static void nfs_zap_caches_locked(struct inode *inode) | |||
| 162 | 173 | ||
| 163 | memset(NFS_I(inode)->cookieverf, 0, sizeof(NFS_I(inode)->cookieverf)); | 174 | memset(NFS_I(inode)->cookieverf, 0, sizeof(NFS_I(inode)->cookieverf)); |
| 164 | if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) { | 175 | if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) { |
| 165 | nfs_fscache_invalidate(inode); | 176 | nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR |
| 166 | nfsi->cache_validity |= NFS_INO_INVALID_ATTR | ||
| 167 | | NFS_INO_INVALID_DATA | 177 | | NFS_INO_INVALID_DATA |
| 168 | | NFS_INO_INVALID_ACCESS | 178 | | NFS_INO_INVALID_ACCESS |
| 169 | | NFS_INO_INVALID_ACL | 179 | | NFS_INO_INVALID_ACL |
| 170 | | NFS_INO_REVAL_PAGECACHE; | 180 | | NFS_INO_REVAL_PAGECACHE); |
| 171 | } else | 181 | } else |
| 172 | nfsi->cache_validity |= NFS_INO_INVALID_ATTR | 182 | nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR |
| 173 | | NFS_INO_INVALID_ACCESS | 183 | | NFS_INO_INVALID_ACCESS |
| 174 | | NFS_INO_INVALID_ACL | 184 | | NFS_INO_INVALID_ACL |
| 175 | | NFS_INO_REVAL_PAGECACHE; | 185 | | NFS_INO_REVAL_PAGECACHE); |
| 176 | nfs_zap_label_cache_locked(nfsi); | 186 | nfs_zap_label_cache_locked(nfsi); |
| 177 | } | 187 | } |
| 178 | 188 | ||
| @@ -187,8 +197,7 @@ void nfs_zap_mapping(struct inode *inode, struct address_space *mapping) | |||
| 187 | { | 197 | { |
| 188 | if (mapping->nrpages != 0) { | 198 | if (mapping->nrpages != 0) { |
| 189 | spin_lock(&inode->i_lock); | 199 | spin_lock(&inode->i_lock); |
| 190 | NFS_I(inode)->cache_validity |= NFS_INO_INVALID_DATA; | 200 | nfs_set_cache_invalid(inode, NFS_INO_INVALID_DATA); |
| 191 | nfs_fscache_invalidate(inode); | ||
| 192 | spin_unlock(&inode->i_lock); | 201 | spin_unlock(&inode->i_lock); |
| 193 | } | 202 | } |
| 194 | } | 203 | } |
| @@ -209,7 +218,7 @@ EXPORT_SYMBOL_GPL(nfs_zap_acl_cache); | |||
| 209 | void nfs_invalidate_atime(struct inode *inode) | 218 | void nfs_invalidate_atime(struct inode *inode) |
| 210 | { | 219 | { |
| 211 | spin_lock(&inode->i_lock); | 220 | spin_lock(&inode->i_lock); |
| 212 | NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATIME; | 221 | nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATIME); |
| 213 | spin_unlock(&inode->i_lock); | 222 | spin_unlock(&inode->i_lock); |
| 214 | } | 223 | } |
| 215 | EXPORT_SYMBOL_GPL(nfs_invalidate_atime); | 224 | EXPORT_SYMBOL_GPL(nfs_invalidate_atime); |
| @@ -369,7 +378,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st | |||
| 369 | inode->i_mode = fattr->mode; | 378 | inode->i_mode = fattr->mode; |
| 370 | if ((fattr->valid & NFS_ATTR_FATTR_MODE) == 0 | 379 | if ((fattr->valid & NFS_ATTR_FATTR_MODE) == 0 |
| 371 | && nfs_server_capable(inode, NFS_CAP_MODE)) | 380 | && nfs_server_capable(inode, NFS_CAP_MODE)) |
| 372 | nfsi->cache_validity |= NFS_INO_INVALID_ATTR; | 381 | nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR); |
| 373 | /* Why so? Because we want revalidate for devices/FIFOs, and | 382 | /* Why so? Because we want revalidate for devices/FIFOs, and |
| 374 | * that's precisely what we have in nfs_file_inode_operations. | 383 | * that's precisely what we have in nfs_file_inode_operations. |
| 375 | */ | 384 | */ |
| @@ -415,36 +424,36 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st | |||
| 415 | if (fattr->valid & NFS_ATTR_FATTR_ATIME) | 424 | if (fattr->valid & NFS_ATTR_FATTR_ATIME) |
| 416 | inode->i_atime = fattr->atime; | 425 | inode->i_atime = fattr->atime; |
| 417 | else if (nfs_server_capable(inode, NFS_CAP_ATIME)) | 426 | else if (nfs_server_capable(inode, NFS_CAP_ATIME)) |
| 418 | nfsi->cache_validity |= NFS_INO_INVALID_ATTR; | 427 | nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR); |
| 419 | if (fattr->valid & NFS_ATTR_FATTR_MTIME) | 428 | if (fattr->valid & NFS_ATTR_FATTR_MTIME) |
| 420 | inode->i_mtime = fattr->mtime; | 429 | inode->i_mtime = fattr->mtime; |
| 421 | else if (nfs_server_capable(inode, NFS_CAP_MTIME)) | 430 | else if (nfs_server_capable(inode, NFS_CAP_MTIME)) |
| 422 | nfsi->cache_validity |= NFS_INO_INVALID_ATTR; | 431 | nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR); |
| 423 | if (fattr->valid & NFS_ATTR_FATTR_CTIME) | 432 | if (fattr->valid & NFS_ATTR_FATTR_CTIME) |
| 424 | inode->i_ctime = fattr->ctime; | 433 | inode->i_ctime = fattr->ctime; |
| 425 | else if (nfs_server_capable(inode, NFS_CAP_CTIME)) | 434 | else if (nfs_server_capable(inode, NFS_CAP_CTIME)) |
| 426 | nfsi->cache_validity |= NFS_INO_INVALID_ATTR; | 435 | nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR); |
| 427 | if (fattr->valid & NFS_ATTR_FATTR_CHANGE) | 436 | if (fattr->valid & NFS_ATTR_FATTR_CHANGE) |
| 428 | inode->i_version = fattr->change_attr; | 437 | inode->i_version = fattr->change_attr; |
| 429 | else if (nfs_server_capable(inode, NFS_CAP_CHANGE_ATTR)) | 438 | else if (nfs_server_capable(inode, NFS_CAP_CHANGE_ATTR)) |
| 430 | nfsi->cache_validity |= NFS_INO_INVALID_ATTR; | 439 | nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR); |
| 431 | if (fattr->valid & NFS_ATTR_FATTR_SIZE) | 440 | if (fattr->valid & NFS_ATTR_FATTR_SIZE) |
| 432 | inode->i_size = nfs_size_to_loff_t(fattr->size); | 441 | inode->i_size = nfs_size_to_loff_t(fattr->size); |
| 433 | else | 442 | else |
| 434 | nfsi->cache_validity |= NFS_INO_INVALID_ATTR | 443 | nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR |
| 435 | | NFS_INO_REVAL_PAGECACHE; | 444 | | NFS_INO_REVAL_PAGECACHE); |
| 436 | if (fattr->valid & NFS_ATTR_FATTR_NLINK) | 445 | if (fattr->valid & NFS_ATTR_FATTR_NLINK) |
| 437 | set_nlink(inode, fattr->nlink); | 446 | set_nlink(inode, fattr->nlink); |
| 438 | else if (nfs_server_capable(inode, NFS_CAP_NLINK)) | 447 | else if (nfs_server_capable(inode, NFS_CAP_NLINK)) |
| 439 | nfsi->cache_validity |= NFS_INO_INVALID_ATTR; | 448 | nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR); |
| 440 | if (fattr->valid & NFS_ATTR_FATTR_OWNER) | 449 | if (fattr->valid & NFS_ATTR_FATTR_OWNER) |
| 441 | inode->i_uid = fattr->uid; | 450 | inode->i_uid = fattr->uid; |
| 442 | else if (nfs_server_capable(inode, NFS_CAP_OWNER)) | 451 | else if (nfs_server_capable(inode, NFS_CAP_OWNER)) |
| 443 | nfsi->cache_validity |= NFS_INO_INVALID_ATTR; | 452 | nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR); |
| 444 | if (fattr->valid & NFS_ATTR_FATTR_GROUP) | 453 | if (fattr->valid & NFS_ATTR_FATTR_GROUP) |
| 445 | inode->i_gid = fattr->gid; | 454 | inode->i_gid = fattr->gid; |
| 446 | else if (nfs_server_capable(inode, NFS_CAP_OWNER_GROUP)) | 455 | else if (nfs_server_capable(inode, NFS_CAP_OWNER_GROUP)) |
| 447 | nfsi->cache_validity |= NFS_INO_INVALID_ATTR; | 456 | nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR); |
| 448 | if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED) | 457 | if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED) |
| 449 | inode->i_blocks = fattr->du.nfs2.blocks; | 458 | inode->i_blocks = fattr->du.nfs2.blocks; |
| 450 | if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) { | 459 | if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) { |
| @@ -550,6 +559,9 @@ static int nfs_vmtruncate(struct inode * inode, loff_t offset) | |||
| 550 | 559 | ||
| 551 | spin_lock(&inode->i_lock); | 560 | spin_lock(&inode->i_lock); |
| 552 | i_size_write(inode, offset); | 561 | i_size_write(inode, offset); |
| 562 | /* Optimisation */ | ||
| 563 | if (offset == 0) | ||
| 564 | NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_DATA; | ||
| 553 | spin_unlock(&inode->i_lock); | 565 | spin_unlock(&inode->i_lock); |
| 554 | 566 | ||
| 555 | truncate_pagecache(inode, offset); | 567 | truncate_pagecache(inode, offset); |
| @@ -578,7 +590,8 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr) | |||
| 578 | inode->i_uid = attr->ia_uid; | 590 | inode->i_uid = attr->ia_uid; |
| 579 | if ((attr->ia_valid & ATTR_GID) != 0) | 591 | if ((attr->ia_valid & ATTR_GID) != 0) |
| 580 | inode->i_gid = attr->ia_gid; | 592 | inode->i_gid = attr->ia_gid; |
| 581 | NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; | 593 | nfs_set_cache_invalid(inode, NFS_INO_INVALID_ACCESS |
| 594 | | NFS_INO_INVALID_ACL); | ||
| 582 | spin_unlock(&inode->i_lock); | 595 | spin_unlock(&inode->i_lock); |
| 583 | } | 596 | } |
| 584 | if ((attr->ia_valid & ATTR_SIZE) != 0) { | 597 | if ((attr->ia_valid & ATTR_SIZE) != 0) { |
| @@ -1101,7 +1114,7 @@ static unsigned long nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr | |||
| 1101 | && inode->i_version == fattr->pre_change_attr) { | 1114 | && inode->i_version == fattr->pre_change_attr) { |
| 1102 | inode->i_version = fattr->change_attr; | 1115 | inode->i_version = fattr->change_attr; |
| 1103 | if (S_ISDIR(inode->i_mode)) | 1116 | if (S_ISDIR(inode->i_mode)) |
| 1104 | nfsi->cache_validity |= NFS_INO_INVALID_DATA; | 1117 | nfs_set_cache_invalid(inode, NFS_INO_INVALID_DATA); |
| 1105 | ret |= NFS_INO_INVALID_ATTR; | 1118 | ret |= NFS_INO_INVALID_ATTR; |
| 1106 | } | 1119 | } |
| 1107 | /* If we have atomic WCC data, we may update some attributes */ | 1120 | /* If we have atomic WCC data, we may update some attributes */ |
| @@ -1117,7 +1130,7 @@ static unsigned long nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr | |||
| 1117 | && timespec_equal(&inode->i_mtime, &fattr->pre_mtime)) { | 1130 | && timespec_equal(&inode->i_mtime, &fattr->pre_mtime)) { |
| 1118 | memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime)); | 1131 | memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime)); |
| 1119 | if (S_ISDIR(inode->i_mode)) | 1132 | if (S_ISDIR(inode->i_mode)) |
| 1120 | nfsi->cache_validity |= NFS_INO_INVALID_DATA; | 1133 | nfs_set_cache_invalid(inode, NFS_INO_INVALID_DATA); |
| 1121 | ret |= NFS_INO_INVALID_ATTR; | 1134 | ret |= NFS_INO_INVALID_ATTR; |
| 1122 | } | 1135 | } |
| 1123 | if ((fattr->valid & NFS_ATTR_FATTR_PRESIZE) | 1136 | if ((fattr->valid & NFS_ATTR_FATTR_PRESIZE) |
| @@ -1128,9 +1141,6 @@ static unsigned long nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr | |||
| 1128 | ret |= NFS_INO_INVALID_ATTR; | 1141 | ret |= NFS_INO_INVALID_ATTR; |
| 1129 | } | 1142 | } |
| 1130 | 1143 | ||
| 1131 | if (nfsi->cache_validity & NFS_INO_INVALID_DATA) | ||
| 1132 | nfs_fscache_invalidate(inode); | ||
| 1133 | |||
| 1134 | return ret; | 1144 | return ret; |
| 1135 | } | 1145 | } |
| 1136 | 1146 | ||
| @@ -1189,7 +1199,7 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat | |||
| 1189 | invalid |= NFS_INO_INVALID_ATIME; | 1199 | invalid |= NFS_INO_INVALID_ATIME; |
| 1190 | 1200 | ||
| 1191 | if (invalid != 0) | 1201 | if (invalid != 0) |
| 1192 | nfsi->cache_validity |= invalid; | 1202 | nfs_set_cache_invalid(inode, invalid); |
| 1193 | 1203 | ||
| 1194 | nfsi->read_cache_jiffies = fattr->time_start; | 1204 | nfsi->read_cache_jiffies = fattr->time_start; |
| 1195 | return 0; | 1205 | return 0; |
| @@ -1402,13 +1412,11 @@ EXPORT_SYMBOL_GPL(nfs_refresh_inode); | |||
| 1402 | 1412 | ||
| 1403 | static int nfs_post_op_update_inode_locked(struct inode *inode, struct nfs_fattr *fattr) | 1413 | static int nfs_post_op_update_inode_locked(struct inode *inode, struct nfs_fattr *fattr) |
| 1404 | { | 1414 | { |
| 1405 | struct nfs_inode *nfsi = NFS_I(inode); | 1415 | unsigned long invalid = NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; |
| 1406 | 1416 | ||
| 1407 | nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; | 1417 | if (S_ISDIR(inode->i_mode)) |
| 1408 | if (S_ISDIR(inode->i_mode)) { | 1418 | invalid |= NFS_INO_INVALID_DATA; |
| 1409 | nfsi->cache_validity |= NFS_INO_INVALID_DATA; | 1419 | nfs_set_cache_invalid(inode, invalid); |
| 1410 | nfs_fscache_invalidate(inode); | ||
| 1411 | } | ||
| 1412 | if ((fattr->valid & NFS_ATTR_FATTR) == 0) | 1420 | if ((fattr->valid & NFS_ATTR_FATTR) == 0) |
| 1413 | return 0; | 1421 | return 0; |
| 1414 | return nfs_refresh_inode_locked(inode, fattr); | 1422 | return nfs_refresh_inode_locked(inode, fattr); |
| @@ -1601,6 +1609,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) | |||
| 1601 | if ((nfsi->npages == 0) || new_isize > cur_isize) { | 1609 | if ((nfsi->npages == 0) || new_isize > cur_isize) { |
| 1602 | i_size_write(inode, new_isize); | 1610 | i_size_write(inode, new_isize); |
| 1603 | invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; | 1611 | invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; |
| 1612 | invalid &= ~NFS_INO_REVAL_PAGECACHE; | ||
| 1604 | } | 1613 | } |
| 1605 | dprintk("NFS: isize change on server for file %s/%ld " | 1614 | dprintk("NFS: isize change on server for file %s/%ld " |
| 1606 | "(%Ld to %Ld)\n", | 1615 | "(%Ld to %Ld)\n", |
| @@ -1702,10 +1711,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) | |||
| 1702 | invalid &= ~NFS_INO_INVALID_DATA; | 1711 | invalid &= ~NFS_INO_INVALID_DATA; |
| 1703 | if (!NFS_PROTO(inode)->have_delegation(inode, FMODE_READ) || | 1712 | if (!NFS_PROTO(inode)->have_delegation(inode, FMODE_READ) || |
| 1704 | (save_cache_validity & NFS_INO_REVAL_FORCED)) | 1713 | (save_cache_validity & NFS_INO_REVAL_FORCED)) |
| 1705 | nfsi->cache_validity |= invalid; | 1714 | nfs_set_cache_invalid(inode, invalid); |
| 1706 | |||
| 1707 | if (invalid & NFS_INO_INVALID_DATA) | ||
| 1708 | nfs_fscache_invalidate(inode); | ||
| 1709 | 1715 | ||
| 1710 | return 0; | 1716 | return 0; |
| 1711 | out_err: | 1717 | out_err: |
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 8b69cba1bb04..82ddbf46660e 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h | |||
| @@ -327,16 +327,14 @@ int nfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *) | |||
| 327 | int nfs_file_fsync_commit(struct file *, loff_t, loff_t, int); | 327 | int nfs_file_fsync_commit(struct file *, loff_t, loff_t, int); |
| 328 | loff_t nfs_file_llseek(struct file *, loff_t, int); | 328 | loff_t nfs_file_llseek(struct file *, loff_t, int); |
| 329 | int nfs_file_flush(struct file *, fl_owner_t); | 329 | int nfs_file_flush(struct file *, fl_owner_t); |
| 330 | ssize_t nfs_file_read(struct kiocb *, const struct iovec *, unsigned long, loff_t); | 330 | ssize_t nfs_file_read(struct kiocb *, struct iov_iter *); |
| 331 | ssize_t nfs_file_splice_read(struct file *, loff_t *, struct pipe_inode_info *, | 331 | ssize_t nfs_file_splice_read(struct file *, loff_t *, struct pipe_inode_info *, |
| 332 | size_t, unsigned int); | 332 | size_t, unsigned int); |
| 333 | int nfs_file_mmap(struct file *, struct vm_area_struct *); | 333 | int nfs_file_mmap(struct file *, struct vm_area_struct *); |
| 334 | ssize_t nfs_file_write(struct kiocb *, const struct iovec *, unsigned long, loff_t); | 334 | ssize_t nfs_file_write(struct kiocb *, struct iov_iter *); |
| 335 | int nfs_file_release(struct inode *, struct file *); | 335 | int nfs_file_release(struct inode *, struct file *); |
| 336 | int nfs_lock(struct file *, int, struct file_lock *); | 336 | int nfs_lock(struct file *, int, struct file_lock *); |
| 337 | int nfs_flock(struct file *, int, struct file_lock *); | 337 | int nfs_flock(struct file *, int, struct file_lock *); |
| 338 | ssize_t nfs_file_splice_write(struct pipe_inode_info *, struct file *, loff_t *, | ||
| 339 | size_t, unsigned int); | ||
| 340 | int nfs_check_flags(int); | 338 | int nfs_check_flags(int); |
| 341 | int nfs_setlease(struct file *, long, struct file_lock **); | 339 | int nfs_setlease(struct file *, long, struct file_lock **); |
| 342 | 340 | ||
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index f63cb87cd730..ba2affa51941 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h | |||
| @@ -230,7 +230,7 @@ int nfs_atomic_open(struct inode *, struct dentry *, struct file *, | |||
| 230 | extern struct file_system_type nfs4_fs_type; | 230 | extern struct file_system_type nfs4_fs_type; |
| 231 | 231 | ||
| 232 | /* nfs4namespace.c */ | 232 | /* nfs4namespace.c */ |
| 233 | struct rpc_clnt *nfs4_create_sec_client(struct rpc_clnt *, struct inode *, struct qstr *); | 233 | struct rpc_clnt *nfs4_negotiate_security(struct rpc_clnt *, struct inode *, struct qstr *); |
| 234 | struct vfsmount *nfs4_submount(struct nfs_server *, struct dentry *, | 234 | struct vfsmount *nfs4_submount(struct nfs_server *, struct dentry *, |
| 235 | struct nfs_fh *, struct nfs_fattr *); | 235 | struct nfs_fh *, struct nfs_fattr *); |
| 236 | int nfs4_replace_transport(struct nfs_server *server, | 236 | int nfs4_replace_transport(struct nfs_server *server, |
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index 464db9dd6318..a816f0627a6c 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c | |||
| @@ -117,10 +117,10 @@ nfs4_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) | |||
| 117 | 117 | ||
| 118 | const struct file_operations nfs4_file_operations = { | 118 | const struct file_operations nfs4_file_operations = { |
| 119 | .llseek = nfs_file_llseek, | 119 | .llseek = nfs_file_llseek, |
| 120 | .read = do_sync_read, | 120 | .read = new_sync_read, |
| 121 | .write = do_sync_write, | 121 | .write = new_sync_write, |
| 122 | .aio_read = nfs_file_read, | 122 | .read_iter = nfs_file_read, |
| 123 | .aio_write = nfs_file_write, | 123 | .write_iter = nfs_file_write, |
| 124 | .mmap = nfs_file_mmap, | 124 | .mmap = nfs_file_mmap, |
| 125 | .open = nfs4_file_open, | 125 | .open = nfs4_file_open, |
| 126 | .flush = nfs_file_flush, | 126 | .flush = nfs_file_flush, |
| @@ -129,7 +129,7 @@ const struct file_operations nfs4_file_operations = { | |||
| 129 | .lock = nfs_lock, | 129 | .lock = nfs_lock, |
| 130 | .flock = nfs_flock, | 130 | .flock = nfs_flock, |
| 131 | .splice_read = nfs_file_splice_read, | 131 | .splice_read = nfs_file_splice_read, |
| 132 | .splice_write = nfs_file_splice_write, | 132 | .splice_write = iter_file_splice_write, |
| 133 | .check_flags = nfs_check_flags, | 133 | .check_flags = nfs_check_flags, |
| 134 | .setlease = nfs_setlease, | 134 | .setlease = nfs_setlease, |
| 135 | }; | 135 | }; |
diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c index 3d5dbf80d46a..3d83cb1fdc70 100644 --- a/fs/nfs/nfs4namespace.c +++ b/fs/nfs/nfs4namespace.c | |||
| @@ -139,16 +139,22 @@ static size_t nfs_parse_server_name(char *string, size_t len, | |||
| 139 | * @server: NFS server struct | 139 | * @server: NFS server struct |
| 140 | * @flavors: List of security tuples returned by SECINFO procedure | 140 | * @flavors: List of security tuples returned by SECINFO procedure |
| 141 | * | 141 | * |
| 142 | * Return the pseudoflavor of the first security mechanism in | 142 | * Return an rpc client that uses the first security mechanism in |
| 143 | * "flavors" that is locally supported. Return RPC_AUTH_UNIX if | 143 | * "flavors" that is locally supported. The "flavors" array |
| 144 | * no matching flavor is found in the array. The "flavors" array | ||
| 145 | * is searched in the order returned from the server, per RFC 3530 | 144 | * is searched in the order returned from the server, per RFC 3530 |
| 146 | * recommendation. | 145 | * recommendation and each flavor is checked for membership in the |
| 146 | * sec= mount option list if it exists. | ||
| 147 | * | ||
| 148 | * Return -EPERM if no matching flavor is found in the array. | ||
| 149 | * | ||
| 150 | * Please call rpc_shutdown_client() when you are done with this rpc client. | ||
| 151 | * | ||
| 147 | */ | 152 | */ |
| 148 | static rpc_authflavor_t nfs_find_best_sec(struct nfs_server *server, | 153 | static struct rpc_clnt *nfs_find_best_sec(struct rpc_clnt *clnt, |
| 154 | struct nfs_server *server, | ||
| 149 | struct nfs4_secinfo_flavors *flavors) | 155 | struct nfs4_secinfo_flavors *flavors) |
| 150 | { | 156 | { |
| 151 | rpc_authflavor_t pseudoflavor; | 157 | rpc_authflavor_t pflavor; |
| 152 | struct nfs4_secinfo4 *secinfo; | 158 | struct nfs4_secinfo4 *secinfo; |
| 153 | unsigned int i; | 159 | unsigned int i; |
| 154 | 160 | ||
| @@ -159,62 +165,73 @@ static rpc_authflavor_t nfs_find_best_sec(struct nfs_server *server, | |||
| 159 | case RPC_AUTH_NULL: | 165 | case RPC_AUTH_NULL: |
| 160 | case RPC_AUTH_UNIX: | 166 | case RPC_AUTH_UNIX: |
| 161 | case RPC_AUTH_GSS: | 167 | case RPC_AUTH_GSS: |
| 162 | pseudoflavor = rpcauth_get_pseudoflavor(secinfo->flavor, | 168 | pflavor = rpcauth_get_pseudoflavor(secinfo->flavor, |
| 163 | &secinfo->flavor_info); | 169 | &secinfo->flavor_info); |
| 164 | /* make sure pseudoflavor matches sec= mount opt */ | 170 | /* does the pseudoflavor match a sec= mount opt? */ |
| 165 | if (pseudoflavor != RPC_AUTH_MAXFLAVOR && | 171 | if (pflavor != RPC_AUTH_MAXFLAVOR && |
| 166 | nfs_auth_info_match(&server->auth_info, | 172 | nfs_auth_info_match(&server->auth_info, pflavor)) { |
| 167 | pseudoflavor)) | 173 | struct rpc_clnt *new; |
| 168 | return pseudoflavor; | 174 | struct rpc_cred *cred; |
| 169 | break; | 175 | |
| 176 | /* Cloning creates an rpc_auth for the flavor */ | ||
| 177 | new = rpc_clone_client_set_auth(clnt, pflavor); | ||
| 178 | if (IS_ERR(new)) | ||
| 179 | continue; | ||
| 180 | /** | ||
| 181 | * Check that the user actually can use the | ||
| 182 | * flavor. This is mostly for RPC_AUTH_GSS | ||
| 183 | * where cr_init obtains a gss context | ||
| 184 | */ | ||
| 185 | cred = rpcauth_lookupcred(new->cl_auth, 0); | ||
| 186 | if (IS_ERR(cred)) { | ||
| 187 | rpc_shutdown_client(new); | ||
| 188 | continue; | ||
| 189 | } | ||
| 190 | put_rpccred(cred); | ||
| 191 | return new; | ||
| 192 | } | ||
| 170 | } | 193 | } |
| 171 | } | 194 | } |
| 172 | 195 | return ERR_PTR(-EPERM); | |
| 173 | /* if there were any sec= options then nothing matched */ | ||
| 174 | if (server->auth_info.flavor_len > 0) | ||
| 175 | return -EPERM; | ||
| 176 | |||
| 177 | return RPC_AUTH_UNIX; | ||
| 178 | } | 196 | } |
| 179 | 197 | ||
| 180 | static rpc_authflavor_t nfs4_negotiate_security(struct inode *inode, struct qstr *name) | 198 | /** |
| 199 | * nfs4_negotiate_security - in response to an NFS4ERR_WRONGSEC on lookup, | ||
| 200 | * return an rpc_clnt that uses the best available security flavor with | ||
| 201 | * respect to the secinfo flavor list and the sec= mount options. | ||
| 202 | * | ||
| 203 | * @clnt: RPC client to clone | ||
| 204 | * @inode: directory inode | ||
| 205 | * @name: lookup name | ||
| 206 | * | ||
| 207 | * Please call rpc_shutdown_client() when you are done with this rpc client. | ||
| 208 | */ | ||
| 209 | struct rpc_clnt * | ||
| 210 | nfs4_negotiate_security(struct rpc_clnt *clnt, struct inode *inode, | ||
| 211 | struct qstr *name) | ||
| 181 | { | 212 | { |
| 182 | struct page *page; | 213 | struct page *page; |
| 183 | struct nfs4_secinfo_flavors *flavors; | 214 | struct nfs4_secinfo_flavors *flavors; |
| 184 | rpc_authflavor_t flavor; | 215 | struct rpc_clnt *new; |
| 185 | int err; | 216 | int err; |
| 186 | 217 | ||
| 187 | page = alloc_page(GFP_KERNEL); | 218 | page = alloc_page(GFP_KERNEL); |
| 188 | if (!page) | 219 | if (!page) |
| 189 | return -ENOMEM; | 220 | return ERR_PTR(-ENOMEM); |
| 221 | |||
| 190 | flavors = page_address(page); | 222 | flavors = page_address(page); |
| 191 | 223 | ||
| 192 | err = nfs4_proc_secinfo(inode, name, flavors); | 224 | err = nfs4_proc_secinfo(inode, name, flavors); |
| 193 | if (err < 0) { | 225 | if (err < 0) { |
| 194 | flavor = err; | 226 | new = ERR_PTR(err); |
| 195 | goto out; | 227 | goto out; |
| 196 | } | 228 | } |
| 197 | 229 | ||
| 198 | flavor = nfs_find_best_sec(NFS_SERVER(inode), flavors); | 230 | new = nfs_find_best_sec(clnt, NFS_SERVER(inode), flavors); |
| 199 | 231 | ||
| 200 | out: | 232 | out: |
| 201 | put_page(page); | 233 | put_page(page); |
| 202 | return flavor; | 234 | return new; |
| 203 | } | ||
| 204 | |||
| 205 | /* | ||
| 206 | * Please call rpc_shutdown_client() when you are done with this client. | ||
| 207 | */ | ||
| 208 | struct rpc_clnt *nfs4_create_sec_client(struct rpc_clnt *clnt, struct inode *inode, | ||
| 209 | struct qstr *name) | ||
| 210 | { | ||
| 211 | rpc_authflavor_t flavor; | ||
| 212 | |||
| 213 | flavor = nfs4_negotiate_security(inode, name); | ||
| 214 | if ((int)flavor < 0) | ||
| 215 | return ERR_PTR((int)flavor); | ||
| 216 | |||
| 217 | return rpc_clone_client_set_auth(clnt, flavor); | ||
| 218 | } | 235 | } |
| 219 | 236 | ||
| 220 | static struct vfsmount *try_location(struct nfs_clone_mount *mountdata, | 237 | static struct vfsmount *try_location(struct nfs_clone_mount *mountdata, |
| @@ -397,11 +414,6 @@ struct vfsmount *nfs4_submount(struct nfs_server *server, struct dentry *dentry, | |||
| 397 | 414 | ||
| 398 | if (client->cl_auth->au_flavor != flavor) | 415 | if (client->cl_auth->au_flavor != flavor) |
| 399 | flavor = client->cl_auth->au_flavor; | 416 | flavor = client->cl_auth->au_flavor; |
| 400 | else { | ||
| 401 | rpc_authflavor_t new = nfs4_negotiate_security(dir, name); | ||
| 402 | if ((int)new >= 0) | ||
| 403 | flavor = new; | ||
| 404 | } | ||
| 405 | mnt = nfs_do_submount(dentry, fh, fattr, flavor); | 417 | mnt = nfs_do_submount(dentry, fh, fattr, flavor); |
| 406 | out: | 418 | out: |
| 407 | rpc_shutdown_client(client); | 419 | rpc_shutdown_client(client); |
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 285ad5334018..4bf3d97cc5a0 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c | |||
| @@ -3247,7 +3247,7 @@ static int nfs4_proc_lookup_common(struct rpc_clnt **clnt, struct inode *dir, | |||
| 3247 | err = -EPERM; | 3247 | err = -EPERM; |
| 3248 | if (client != *clnt) | 3248 | if (client != *clnt) |
| 3249 | goto out; | 3249 | goto out; |
| 3250 | client = nfs4_create_sec_client(client, dir, name); | 3250 | client = nfs4_negotiate_security(client, dir, name); |
| 3251 | if (IS_ERR(client)) | 3251 | if (IS_ERR(client)) |
| 3252 | return PTR_ERR(client); | 3252 | return PTR_ERR(client); |
| 3253 | 3253 | ||
diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 3ee5af4e738e..98ff061ccaf3 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c | |||
| @@ -934,12 +934,14 @@ static bool nfs_write_pageuptodate(struct page *page, struct inode *inode) | |||
| 934 | 934 | ||
| 935 | if (nfs_have_delegated_attributes(inode)) | 935 | if (nfs_have_delegated_attributes(inode)) |
| 936 | goto out; | 936 | goto out; |
| 937 | if (nfsi->cache_validity & (NFS_INO_INVALID_DATA|NFS_INO_REVAL_PAGECACHE)) | 937 | if (nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE) |
| 938 | return false; | 938 | return false; |
| 939 | smp_rmb(); | 939 | smp_rmb(); |
| 940 | if (test_bit(NFS_INO_INVALIDATING, &nfsi->flags)) | 940 | if (test_bit(NFS_INO_INVALIDATING, &nfsi->flags)) |
| 941 | return false; | 941 | return false; |
| 942 | out: | 942 | out: |
| 943 | if (nfsi->cache_validity & NFS_INO_INVALID_DATA) | ||
| 944 | return false; | ||
| 943 | return PageUptodate(page) != 0; | 945 | return PageUptodate(page) != 0; |
| 944 | } | 946 | } |
| 945 | 947 | ||
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 6851b003f2a4..8f029db5d271 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c | |||
| @@ -617,15 +617,6 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
| 617 | 617 | ||
| 618 | switch (create->cr_type) { | 618 | switch (create->cr_type) { |
| 619 | case NF4LNK: | 619 | case NF4LNK: |
| 620 | /* ugh! we have to null-terminate the linktext, or | ||
| 621 | * vfs_symlink() will choke. it is always safe to | ||
| 622 | * null-terminate by brute force, since at worst we | ||
| 623 | * will overwrite the first byte of the create namelen | ||
| 624 | * in the XDR buffer, which has already been extracted | ||
| 625 | * during XDR decode. | ||
| 626 | */ | ||
| 627 | create->cr_linkname[create->cr_linklen] = 0; | ||
| 628 | |||
| 629 | status = nfsd_symlink(rqstp, &cstate->current_fh, | 620 | status = nfsd_symlink(rqstp, &cstate->current_fh, |
| 630 | create->cr_name, create->cr_namelen, | 621 | create->cr_name, create->cr_namelen, |
| 631 | create->cr_linkname, create->cr_linklen, | 622 | create->cr_linkname, create->cr_linklen, |
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index c0d45cec9958..2204e1fe5725 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c | |||
| @@ -41,6 +41,7 @@ | |||
| 41 | #include <linux/ratelimit.h> | 41 | #include <linux/ratelimit.h> |
| 42 | #include <linux/sunrpc/svcauth_gss.h> | 42 | #include <linux/sunrpc/svcauth_gss.h> |
| 43 | #include <linux/sunrpc/addr.h> | 43 | #include <linux/sunrpc/addr.h> |
| 44 | #include <linux/hash.h> | ||
| 44 | #include "xdr4.h" | 45 | #include "xdr4.h" |
| 45 | #include "xdr4cb.h" | 46 | #include "xdr4cb.h" |
| 46 | #include "vfs.h" | 47 | #include "vfs.h" |
| @@ -364,6 +365,79 @@ static struct nfs4_ol_stateid * nfs4_alloc_stateid(struct nfs4_client *clp) | |||
| 364 | return openlockstateid(nfs4_alloc_stid(clp, stateid_slab)); | 365 | return openlockstateid(nfs4_alloc_stid(clp, stateid_slab)); |
| 365 | } | 366 | } |
| 366 | 367 | ||
| 368 | /* | ||
| 369 | * When we recall a delegation, we should be careful not to hand it | ||
| 370 | * out again straight away. | ||
| 371 | * To ensure this we keep a pair of bloom filters ('new' and 'old') | ||
| 372 | * in which the filehandles of recalled delegations are "stored". | ||
| 373 | * If a filehandle appear in either filter, a delegation is blocked. | ||
| 374 | * When a delegation is recalled, the filehandle is stored in the "new" | ||
| 375 | * filter. | ||
| 376 | * Every 30 seconds we swap the filters and clear the "new" one, | ||
| 377 | * unless both are empty of course. | ||
| 378 | * | ||
| 379 | * Each filter is 256 bits. We hash the filehandle to 32bit and use the | ||
| 380 | * low 3 bytes as hash-table indices. | ||
| 381 | * | ||
| 382 | * 'state_lock', which is always held when block_delegations() is called, | ||
| 383 | * is used to manage concurrent access. Testing does not need the lock | ||
| 384 | * except when swapping the two filters. | ||
| 385 | */ | ||
| 386 | static struct bloom_pair { | ||
| 387 | int entries, old_entries; | ||
| 388 | time_t swap_time; | ||
| 389 | int new; /* index into 'set' */ | ||
| 390 | DECLARE_BITMAP(set[2], 256); | ||
| 391 | } blocked_delegations; | ||
| 392 | |||
| 393 | static int delegation_blocked(struct knfsd_fh *fh) | ||
| 394 | { | ||
| 395 | u32 hash; | ||
| 396 | struct bloom_pair *bd = &blocked_delegations; | ||
| 397 | |||
| 398 | if (bd->entries == 0) | ||
| 399 | return 0; | ||
| 400 | if (seconds_since_boot() - bd->swap_time > 30) { | ||
| 401 | spin_lock(&state_lock); | ||
| 402 | if (seconds_since_boot() - bd->swap_time > 30) { | ||
| 403 | bd->entries -= bd->old_entries; | ||
| 404 | bd->old_entries = bd->entries; | ||
| 405 | memset(bd->set[bd->new], 0, | ||
| 406 | sizeof(bd->set[0])); | ||
| 407 | bd->new = 1-bd->new; | ||
| 408 | bd->swap_time = seconds_since_boot(); | ||
| 409 | } | ||
| 410 | spin_unlock(&state_lock); | ||
| 411 | } | ||
| 412 | hash = arch_fast_hash(&fh->fh_base, fh->fh_size, 0); | ||
| 413 | if (test_bit(hash&255, bd->set[0]) && | ||
| 414 | test_bit((hash>>8)&255, bd->set[0]) && | ||
| 415 | test_bit((hash>>16)&255, bd->set[0])) | ||
| 416 | return 1; | ||
| 417 | |||
| 418 | if (test_bit(hash&255, bd->set[1]) && | ||
| 419 | test_bit((hash>>8)&255, bd->set[1]) && | ||
| 420 | test_bit((hash>>16)&255, bd->set[1])) | ||
| 421 | return 1; | ||
| 422 | |||
| 423 | return 0; | ||
| 424 | } | ||
| 425 | |||
| 426 | static void block_delegations(struct knfsd_fh *fh) | ||
| 427 | { | ||
| 428 | u32 hash; | ||
| 429 | struct bloom_pair *bd = &blocked_delegations; | ||
| 430 | |||
| 431 | hash = arch_fast_hash(&fh->fh_base, fh->fh_size, 0); | ||
| 432 | |||
| 433 | __set_bit(hash&255, bd->set[bd->new]); | ||
| 434 | __set_bit((hash>>8)&255, bd->set[bd->new]); | ||
| 435 | __set_bit((hash>>16)&255, bd->set[bd->new]); | ||
| 436 | if (bd->entries == 0) | ||
| 437 | bd->swap_time = seconds_since_boot(); | ||
| 438 | bd->entries += 1; | ||
| 439 | } | ||
| 440 | |||
| 367 | static struct nfs4_delegation * | 441 | static struct nfs4_delegation * |
| 368 | alloc_init_deleg(struct nfs4_client *clp, struct nfs4_ol_stateid *stp, struct svc_fh *current_fh) | 442 | alloc_init_deleg(struct nfs4_client *clp, struct nfs4_ol_stateid *stp, struct svc_fh *current_fh) |
| 369 | { | 443 | { |
| @@ -372,6 +446,8 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_ol_stateid *stp, struct sv | |||
| 372 | dprintk("NFSD alloc_init_deleg\n"); | 446 | dprintk("NFSD alloc_init_deleg\n"); |
| 373 | if (num_delegations > max_delegations) | 447 | if (num_delegations > max_delegations) |
| 374 | return NULL; | 448 | return NULL; |
| 449 | if (delegation_blocked(¤t_fh->fh_handle)) | ||
| 450 | return NULL; | ||
| 375 | dp = delegstateid(nfs4_alloc_stid(clp, deleg_slab)); | 451 | dp = delegstateid(nfs4_alloc_stid(clp, deleg_slab)); |
| 376 | if (dp == NULL) | 452 | if (dp == NULL) |
| 377 | return dp; | 453 | return dp; |
| @@ -2770,6 +2846,8 @@ static void nfsd_break_one_deleg(struct nfs4_delegation *dp) | |||
| 2770 | /* Only place dl_time is set; protected by i_lock: */ | 2846 | /* Only place dl_time is set; protected by i_lock: */ |
| 2771 | dp->dl_time = get_seconds(); | 2847 | dp->dl_time = get_seconds(); |
| 2772 | 2848 | ||
| 2849 | block_delegations(&dp->dl_fh); | ||
| 2850 | |||
| 2773 | nfsd4_cb_recall(dp); | 2851 | nfsd4_cb_recall(dp); |
| 2774 | } | 2852 | } |
| 2775 | 2853 | ||
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 2d305a121f37..2fc7abebeb9b 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c | |||
| @@ -600,7 +600,18 @@ nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create | |||
| 600 | READ_BUF(4); | 600 | READ_BUF(4); |
| 601 | create->cr_linklen = be32_to_cpup(p++); | 601 | create->cr_linklen = be32_to_cpup(p++); |
| 602 | READ_BUF(create->cr_linklen); | 602 | READ_BUF(create->cr_linklen); |
| 603 | SAVEMEM(create->cr_linkname, create->cr_linklen); | 603 | /* |
| 604 | * The VFS will want a null-terminated string, and | ||
| 605 | * null-terminating in place isn't safe since this might | ||
| 606 | * end on a page boundary: | ||
| 607 | */ | ||
| 608 | create->cr_linkname = | ||
| 609 | kmalloc(create->cr_linklen + 1, GFP_KERNEL); | ||
| 610 | if (!create->cr_linkname) | ||
| 611 | return nfserr_jukebox; | ||
| 612 | memcpy(create->cr_linkname, p, create->cr_linklen); | ||
| 613 | create->cr_linkname[create->cr_linklen] = '\0'; | ||
| 614 | defer_free(argp, kfree, create->cr_linkname); | ||
| 604 | break; | 615 | break; |
| 605 | case NF4BLK: | 616 | case NF4BLK: |
| 606 | case NF4CHR: | 617 | case NF4CHR: |
| @@ -2687,6 +2698,7 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen, | |||
| 2687 | nfserr = nfserr_toosmall; | 2698 | nfserr = nfserr_toosmall; |
| 2688 | goto fail; | 2699 | goto fail; |
| 2689 | case nfserr_noent: | 2700 | case nfserr_noent: |
| 2701 | xdr_truncate_encode(xdr, start_offset); | ||
| 2690 | goto skip_entry; | 2702 | goto skip_entry; |
| 2691 | default: | 2703 | default: |
| 2692 | /* | 2704 | /* |
| @@ -3266,7 +3278,7 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd | |||
| 3266 | 3278 | ||
| 3267 | wire_count = htonl(maxcount); | 3279 | wire_count = htonl(maxcount); |
| 3268 | write_bytes_to_xdr_buf(xdr->buf, length_offset, &wire_count, 4); | 3280 | write_bytes_to_xdr_buf(xdr->buf, length_offset, &wire_count, 4); |
| 3269 | xdr_truncate_encode(xdr, length_offset + 4 + maxcount); | 3281 | xdr_truncate_encode(xdr, length_offset + 4 + ALIGN(maxcount, 4)); |
| 3270 | if (maxcount & 3) | 3282 | if (maxcount & 3) |
| 3271 | write_bytes_to_xdr_buf(xdr->buf, length_offset + 4 + maxcount, | 3283 | write_bytes_to_xdr_buf(xdr->buf, length_offset + 4 + maxcount, |
| 3272 | &zero, 4 - (maxcount&3)); | 3284 | &zero, 4 - (maxcount&3)); |
diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c index f3a82fbcae02..24978153c0c4 100644 --- a/fs/nilfs2/file.c +++ b/fs/nilfs2/file.c | |||
| @@ -152,10 +152,10 @@ static int nilfs_file_mmap(struct file *file, struct vm_area_struct *vma) | |||
| 152 | */ | 152 | */ |
| 153 | const struct file_operations nilfs_file_operations = { | 153 | const struct file_operations nilfs_file_operations = { |
| 154 | .llseek = generic_file_llseek, | 154 | .llseek = generic_file_llseek, |
| 155 | .read = do_sync_read, | 155 | .read = new_sync_read, |
| 156 | .write = do_sync_write, | 156 | .write = new_sync_write, |
| 157 | .aio_read = generic_file_aio_read, | 157 | .read_iter = generic_file_read_iter, |
| 158 | .aio_write = generic_file_aio_write, | 158 | .write_iter = generic_file_write_iter, |
| 159 | .unlocked_ioctl = nilfs_ioctl, | 159 | .unlocked_ioctl = nilfs_ioctl, |
| 160 | #ifdef CONFIG_COMPAT | 160 | #ifdef CONFIG_COMPAT |
| 161 | .compat_ioctl = nilfs_compat_ioctl, | 161 | .compat_ioctl = nilfs_compat_ioctl, |
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index b9c5726120e3..6252b173a465 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c | |||
| @@ -298,19 +298,20 @@ static int nilfs_write_end(struct file *file, struct address_space *mapping, | |||
| 298 | } | 298 | } |
| 299 | 299 | ||
| 300 | static ssize_t | 300 | static ssize_t |
| 301 | nilfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, | 301 | nilfs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, |
| 302 | loff_t offset, unsigned long nr_segs) | 302 | loff_t offset) |
| 303 | { | 303 | { |
| 304 | struct file *file = iocb->ki_filp; | 304 | struct file *file = iocb->ki_filp; |
| 305 | struct address_space *mapping = file->f_mapping; | 305 | struct address_space *mapping = file->f_mapping; |
| 306 | struct inode *inode = file->f_mapping->host; | 306 | struct inode *inode = file->f_mapping->host; |
| 307 | size_t count = iov_iter_count(iter); | ||
| 307 | ssize_t size; | 308 | ssize_t size; |
| 308 | 309 | ||
| 309 | if (rw == WRITE) | 310 | if (rw == WRITE) |
| 310 | return 0; | 311 | return 0; |
| 311 | 312 | ||
| 312 | /* Needs synchronization with the cleaner */ | 313 | /* Needs synchronization with the cleaner */ |
| 313 | size = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs, | 314 | size = blockdev_direct_IO(rw, iocb, inode, iter, offset, |
| 314 | nilfs_get_block); | 315 | nilfs_get_block); |
| 315 | 316 | ||
| 316 | /* | 317 | /* |
| @@ -319,7 +320,7 @@ nilfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, | |||
| 319 | */ | 320 | */ |
| 320 | if (unlikely((rw & WRITE) && size < 0)) { | 321 | if (unlikely((rw & WRITE) && size < 0)) { |
| 321 | loff_t isize = i_size_read(inode); | 322 | loff_t isize = i_size_read(inode); |
| 322 | loff_t end = offset + iov_length(iov, nr_segs); | 323 | loff_t end = offset + count; |
| 323 | 324 | ||
| 324 | if (end > isize) | 325 | if (end > isize) |
| 325 | nilfs_write_failed(mapping, end); | 326 | nilfs_write_failed(mapping, end); |
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c index 86ddab916b66..5c9e2c81cb11 100644 --- a/fs/ntfs/file.c +++ b/fs/ntfs/file.c | |||
| @@ -2090,10 +2090,7 @@ static ssize_t ntfs_file_aio_write_nolock(struct kiocb *iocb, | |||
| 2090 | size_t count; /* after file limit checks */ | 2090 | size_t count; /* after file limit checks */ |
| 2091 | ssize_t written, err; | 2091 | ssize_t written, err; |
| 2092 | 2092 | ||
| 2093 | count = 0; | 2093 | count = iov_length(iov, nr_segs); |
| 2094 | err = generic_segment_checks(iov, &nr_segs, &count, VERIFY_READ); | ||
| 2095 | if (err) | ||
| 2096 | return err; | ||
| 2097 | pos = *ppos; | 2094 | pos = *ppos; |
| 2098 | /* We can write back this queue in page reclaim. */ | 2095 | /* We can write back this queue in page reclaim. */ |
| 2099 | current->backing_dev_info = mapping->backing_dev_info; | 2096 | current->backing_dev_info = mapping->backing_dev_info; |
| @@ -2202,8 +2199,8 @@ static int ntfs_file_fsync(struct file *filp, loff_t start, loff_t end, | |||
| 2202 | 2199 | ||
| 2203 | const struct file_operations ntfs_file_ops = { | 2200 | const struct file_operations ntfs_file_ops = { |
| 2204 | .llseek = generic_file_llseek, /* Seek inside file. */ | 2201 | .llseek = generic_file_llseek, /* Seek inside file. */ |
| 2205 | .read = do_sync_read, /* Read from file. */ | 2202 | .read = new_sync_read, /* Read from file. */ |
| 2206 | .aio_read = generic_file_aio_read, /* Async read from file. */ | 2203 | .read_iter = generic_file_read_iter, /* Async read from file. */ |
| 2207 | #ifdef NTFS_RW | 2204 | #ifdef NTFS_RW |
| 2208 | .write = do_sync_write, /* Write to file. */ | 2205 | .write = do_sync_write, /* Write to file. */ |
| 2209 | .aio_write = ntfs_file_aio_write, /* Async write to file. */ | 2206 | .aio_write = ntfs_file_aio_write, /* Async write to file. */ |
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index d310d12a9adc..4a231a166cf8 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c | |||
| @@ -599,9 +599,8 @@ static int ocfs2_releasepage(struct page *page, gfp_t wait) | |||
| 599 | 599 | ||
| 600 | static ssize_t ocfs2_direct_IO(int rw, | 600 | static ssize_t ocfs2_direct_IO(int rw, |
| 601 | struct kiocb *iocb, | 601 | struct kiocb *iocb, |
| 602 | const struct iovec *iov, | 602 | struct iov_iter *iter, |
| 603 | loff_t offset, | 603 | loff_t offset) |
| 604 | unsigned long nr_segs) | ||
| 605 | { | 604 | { |
| 606 | struct file *file = iocb->ki_filp; | 605 | struct file *file = iocb->ki_filp; |
| 607 | struct inode *inode = file_inode(file)->i_mapping->host; | 606 | struct inode *inode = file_inode(file)->i_mapping->host; |
| @@ -618,7 +617,7 @@ static ssize_t ocfs2_direct_IO(int rw, | |||
| 618 | return 0; | 617 | return 0; |
| 619 | 618 | ||
| 620 | return __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, | 619 | return __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, |
| 621 | iov, offset, nr_segs, | 620 | iter, offset, |
| 622 | ocfs2_direct_IO_get_blocks, | 621 | ocfs2_direct_IO_get_blocks, |
| 623 | ocfs2_dio_end_io, NULL, 0); | 622 | ocfs2_dio_end_io, NULL, 0); |
| 624 | } | 623 | } |
diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h index a106b3f2b22a..fae17c640df3 100644 --- a/fs/ocfs2/dlm/dlmcommon.h +++ b/fs/ocfs2/dlm/dlmcommon.h | |||
| @@ -331,6 +331,7 @@ struct dlm_lock_resource | |||
| 331 | u16 state; | 331 | u16 state; |
| 332 | char lvb[DLM_LVB_LEN]; | 332 | char lvb[DLM_LVB_LEN]; |
| 333 | unsigned int inflight_locks; | 333 | unsigned int inflight_locks; |
| 334 | unsigned int inflight_assert_workers; | ||
| 334 | unsigned long refmap[BITS_TO_LONGS(O2NM_MAX_NODES)]; | 335 | unsigned long refmap[BITS_TO_LONGS(O2NM_MAX_NODES)]; |
| 335 | }; | 336 | }; |
| 336 | 337 | ||
| @@ -910,6 +911,9 @@ void dlm_lockres_drop_inflight_ref(struct dlm_ctxt *dlm, | |||
| 910 | void dlm_lockres_grab_inflight_ref(struct dlm_ctxt *dlm, | 911 | void dlm_lockres_grab_inflight_ref(struct dlm_ctxt *dlm, |
| 911 | struct dlm_lock_resource *res); | 912 | struct dlm_lock_resource *res); |
| 912 | 913 | ||
| 914 | void __dlm_lockres_grab_inflight_worker(struct dlm_ctxt *dlm, | ||
| 915 | struct dlm_lock_resource *res); | ||
| 916 | |||
| 913 | void dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock); | 917 | void dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock); |
| 914 | void dlm_queue_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock); | 918 | void dlm_queue_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock); |
| 915 | void __dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock); | 919 | void __dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock); |
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index 3087a21d32f9..82abf0cc9a12 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c | |||
| @@ -581,6 +581,7 @@ static void dlm_init_lockres(struct dlm_ctxt *dlm, | |||
| 581 | atomic_set(&res->asts_reserved, 0); | 581 | atomic_set(&res->asts_reserved, 0); |
| 582 | res->migration_pending = 0; | 582 | res->migration_pending = 0; |
| 583 | res->inflight_locks = 0; | 583 | res->inflight_locks = 0; |
| 584 | res->inflight_assert_workers = 0; | ||
| 584 | 585 | ||
| 585 | res->dlm = dlm; | 586 | res->dlm = dlm; |
| 586 | 587 | ||
| @@ -683,6 +684,43 @@ void dlm_lockres_drop_inflight_ref(struct dlm_ctxt *dlm, | |||
| 683 | wake_up(&res->wq); | 684 | wake_up(&res->wq); |
| 684 | } | 685 | } |
| 685 | 686 | ||
| 687 | void __dlm_lockres_grab_inflight_worker(struct dlm_ctxt *dlm, | ||
| 688 | struct dlm_lock_resource *res) | ||
| 689 | { | ||
| 690 | assert_spin_locked(&res->spinlock); | ||
| 691 | res->inflight_assert_workers++; | ||
| 692 | mlog(0, "%s:%.*s: inflight assert worker++: now %u\n", | ||
| 693 | dlm->name, res->lockname.len, res->lockname.name, | ||
| 694 | res->inflight_assert_workers); | ||
| 695 | } | ||
| 696 | |||
| 697 | static void dlm_lockres_grab_inflight_worker(struct dlm_ctxt *dlm, | ||
| 698 | struct dlm_lock_resource *res) | ||
| 699 | { | ||
| 700 | spin_lock(&res->spinlock); | ||
| 701 | __dlm_lockres_grab_inflight_worker(dlm, res); | ||
| 702 | spin_unlock(&res->spinlock); | ||
| 703 | } | ||
| 704 | |||
| 705 | static void __dlm_lockres_drop_inflight_worker(struct dlm_ctxt *dlm, | ||
| 706 | struct dlm_lock_resource *res) | ||
| 707 | { | ||
| 708 | assert_spin_locked(&res->spinlock); | ||
| 709 | BUG_ON(res->inflight_assert_workers == 0); | ||
| 710 | res->inflight_assert_workers--; | ||
| 711 | mlog(0, "%s:%.*s: inflight assert worker--: now %u\n", | ||
| 712 | dlm->name, res->lockname.len, res->lockname.name, | ||
| 713 | res->inflight_assert_workers); | ||
| 714 | } | ||
| 715 | |||
| 716 | static void dlm_lockres_drop_inflight_worker(struct dlm_ctxt *dlm, | ||
| 717 | struct dlm_lock_resource *res) | ||
| 718 | { | ||
| 719 | spin_lock(&res->spinlock); | ||
| 720 | __dlm_lockres_drop_inflight_worker(dlm, res); | ||
| 721 | spin_unlock(&res->spinlock); | ||
| 722 | } | ||
| 723 | |||
| 686 | /* | 724 | /* |
| 687 | * lookup a lock resource by name. | 725 | * lookup a lock resource by name. |
| 688 | * may already exist in the hashtable. | 726 | * may already exist in the hashtable. |
| @@ -1603,7 +1641,8 @@ send_response: | |||
| 1603 | mlog(ML_ERROR, "failed to dispatch assert master work\n"); | 1641 | mlog(ML_ERROR, "failed to dispatch assert master work\n"); |
| 1604 | response = DLM_MASTER_RESP_ERROR; | 1642 | response = DLM_MASTER_RESP_ERROR; |
| 1605 | dlm_lockres_put(res); | 1643 | dlm_lockres_put(res); |
| 1606 | } | 1644 | } else |
| 1645 | dlm_lockres_grab_inflight_worker(dlm, res); | ||
| 1607 | } else { | 1646 | } else { |
| 1608 | if (res) | 1647 | if (res) |
| 1609 | dlm_lockres_put(res); | 1648 | dlm_lockres_put(res); |
| @@ -2118,6 +2157,8 @@ static void dlm_assert_master_worker(struct dlm_work_item *item, void *data) | |||
| 2118 | dlm_lockres_release_ast(dlm, res); | 2157 | dlm_lockres_release_ast(dlm, res); |
| 2119 | 2158 | ||
| 2120 | put: | 2159 | put: |
| 2160 | dlm_lockres_drop_inflight_worker(dlm, res); | ||
| 2161 | |||
| 2121 | dlm_lockres_put(res); | 2162 | dlm_lockres_put(res); |
| 2122 | 2163 | ||
| 2123 | mlog(0, "finished with dlm_assert_master_worker\n"); | 2164 | mlog(0, "finished with dlm_assert_master_worker\n"); |
| @@ -3088,11 +3129,15 @@ static int dlm_add_migration_mle(struct dlm_ctxt *dlm, | |||
| 3088 | /* remove it so that only one mle will be found */ | 3129 | /* remove it so that only one mle will be found */ |
| 3089 | __dlm_unlink_mle(dlm, tmp); | 3130 | __dlm_unlink_mle(dlm, tmp); |
| 3090 | __dlm_mle_detach_hb_events(dlm, tmp); | 3131 | __dlm_mle_detach_hb_events(dlm, tmp); |
| 3091 | ret = DLM_MIGRATE_RESPONSE_MASTERY_REF; | 3132 | if (tmp->type == DLM_MLE_MASTER) { |
| 3092 | mlog(0, "%s:%.*s: master=%u, newmaster=%u, " | 3133 | ret = DLM_MIGRATE_RESPONSE_MASTERY_REF; |
| 3093 | "telling master to get ref for cleared out mle " | 3134 | mlog(0, "%s:%.*s: master=%u, newmaster=%u, " |
| 3094 | "during migration\n", dlm->name, namelen, name, | 3135 | "telling master to get ref " |
| 3095 | master, new_master); | 3136 | "for cleared out mle during " |
| 3137 | "migration\n", dlm->name, | ||
| 3138 | namelen, name, master, | ||
| 3139 | new_master); | ||
| 3140 | } | ||
| 3096 | } | 3141 | } |
| 3097 | spin_unlock(&tmp->spinlock); | 3142 | spin_unlock(&tmp->spinlock); |
| 3098 | } | 3143 | } |
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index 5de019437ea5..45067faf5695 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c | |||
| @@ -1708,7 +1708,8 @@ int dlm_master_requery_handler(struct o2net_msg *msg, u32 len, void *data, | |||
| 1708 | mlog_errno(-ENOMEM); | 1708 | mlog_errno(-ENOMEM); |
| 1709 | /* retry!? */ | 1709 | /* retry!? */ |
| 1710 | BUG(); | 1710 | BUG(); |
| 1711 | } | 1711 | } else |
| 1712 | __dlm_lockres_grab_inflight_worker(dlm, res); | ||
| 1712 | } else /* put.. incase we are not the master */ | 1713 | } else /* put.. incase we are not the master */ |
| 1713 | dlm_lockres_put(res); | 1714 | dlm_lockres_put(res); |
| 1714 | spin_unlock(&res->spinlock); | 1715 | spin_unlock(&res->spinlock); |
diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c index 9db869de829d..69aac6f088ad 100644 --- a/fs/ocfs2/dlm/dlmthread.c +++ b/fs/ocfs2/dlm/dlmthread.c | |||
| @@ -259,12 +259,15 @@ static void dlm_run_purge_list(struct dlm_ctxt *dlm, | |||
| 259 | * refs on it. */ | 259 | * refs on it. */ |
| 260 | unused = __dlm_lockres_unused(lockres); | 260 | unused = __dlm_lockres_unused(lockres); |
| 261 | if (!unused || | 261 | if (!unused || |
| 262 | (lockres->state & DLM_LOCK_RES_MIGRATING)) { | 262 | (lockres->state & DLM_LOCK_RES_MIGRATING) || |
| 263 | (lockres->inflight_assert_workers != 0)) { | ||
| 263 | mlog(0, "%s: res %.*s is in use or being remastered, " | 264 | mlog(0, "%s: res %.*s is in use or being remastered, " |
| 264 | "used %d, state %d\n", dlm->name, | 265 | "used %d, state %d, assert master workers %u\n", |
| 265 | lockres->lockname.len, lockres->lockname.name, | 266 | dlm->name, lockres->lockname.len, |
| 266 | !unused, lockres->state); | 267 | lockres->lockname.name, |
| 267 | list_move_tail(&dlm->purge_list, &lockres->purge); | 268 | !unused, lockres->state, |
| 269 | lockres->inflight_assert_workers); | ||
| 270 | list_move_tail(&lockres->purge, &dlm->purge_list); | ||
| 268 | spin_unlock(&lockres->spinlock); | 271 | spin_unlock(&lockres->spinlock); |
| 269 | continue; | 272 | continue; |
| 270 | } | 273 | } |
diff --git a/fs/ocfs2/dlm/dlmunlock.c b/fs/ocfs2/dlm/dlmunlock.c index 5698b52cf5c9..2e3c9dbab68c 100644 --- a/fs/ocfs2/dlm/dlmunlock.c +++ b/fs/ocfs2/dlm/dlmunlock.c | |||
| @@ -191,7 +191,9 @@ static enum dlm_status dlmunlock_common(struct dlm_ctxt *dlm, | |||
| 191 | DLM_UNLOCK_CLEAR_CONVERT_TYPE); | 191 | DLM_UNLOCK_CLEAR_CONVERT_TYPE); |
| 192 | } else if (status == DLM_RECOVERING || | 192 | } else if (status == DLM_RECOVERING || |
| 193 | status == DLM_MIGRATING || | 193 | status == DLM_MIGRATING || |
| 194 | status == DLM_FORWARD) { | 194 | status == DLM_FORWARD || |
| 195 | status == DLM_NOLOCKMGR | ||
| 196 | ) { | ||
| 195 | /* must clear the actions because this unlock | 197 | /* must clear the actions because this unlock |
| 196 | * is about to be retried. cannot free or do | 198 | * is about to be retried. cannot free or do |
| 197 | * any list manipulation. */ | 199 | * any list manipulation. */ |
| @@ -200,7 +202,8 @@ static enum dlm_status dlmunlock_common(struct dlm_ctxt *dlm, | |||
| 200 | res->lockname.name, | 202 | res->lockname.name, |
| 201 | status==DLM_RECOVERING?"recovering": | 203 | status==DLM_RECOVERING?"recovering": |
| 202 | (status==DLM_MIGRATING?"migrating": | 204 | (status==DLM_MIGRATING?"migrating": |
| 203 | "forward")); | 205 | (status == DLM_FORWARD ? "forward" : |
| 206 | "nolockmanager"))); | ||
| 204 | actions = 0; | 207 | actions = 0; |
| 205 | } | 208 | } |
| 206 | if (flags & LKM_CANCEL) | 209 | if (flags & LKM_CANCEL) |
| @@ -364,7 +367,10 @@ static enum dlm_status dlm_send_remote_unlock_request(struct dlm_ctxt *dlm, | |||
| 364 | * updated state to the recovery master. this thread | 367 | * updated state to the recovery master. this thread |
| 365 | * just needs to finish out the operation and call | 368 | * just needs to finish out the operation and call |
| 366 | * the unlockast. */ | 369 | * the unlockast. */ |
| 367 | ret = DLM_NORMAL; | 370 | if (dlm_is_node_dead(dlm, owner)) |
| 371 | ret = DLM_NORMAL; | ||
| 372 | else | ||
| 373 | ret = DLM_NOLOCKMGR; | ||
| 368 | } else { | 374 | } else { |
| 369 | /* something bad. this will BUG in ocfs2 */ | 375 | /* something bad. this will BUG in ocfs2 */ |
| 370 | ret = dlm_err_to_dlm_status(tmpret); | 376 | ret = dlm_err_to_dlm_status(tmpret); |
| @@ -638,7 +644,9 @@ retry: | |||
| 638 | 644 | ||
| 639 | if (status == DLM_RECOVERING || | 645 | if (status == DLM_RECOVERING || |
| 640 | status == DLM_MIGRATING || | 646 | status == DLM_MIGRATING || |
| 641 | status == DLM_FORWARD) { | 647 | status == DLM_FORWARD || |
| 648 | status == DLM_NOLOCKMGR) { | ||
| 649 | |||
| 642 | /* We want to go away for a tiny bit to allow recovery | 650 | /* We want to go away for a tiny bit to allow recovery |
| 643 | * / migration to complete on this resource. I don't | 651 | * / migration to complete on this resource. I don't |
| 644 | * know of any wait queue we could sleep on as this | 652 | * know of any wait queue we could sleep on as this |
| @@ -650,7 +658,7 @@ retry: | |||
| 650 | msleep(50); | 658 | msleep(50); |
| 651 | 659 | ||
| 652 | mlog(0, "retrying unlock due to pending recovery/" | 660 | mlog(0, "retrying unlock due to pending recovery/" |
| 653 | "migration/in-progress\n"); | 661 | "migration/in-progress/reconnect\n"); |
| 654 | goto retry; | 662 | goto retry; |
| 655 | } | 663 | } |
| 656 | 664 | ||
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 8eb6e5732d3b..2930e231f3f9 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c | |||
| @@ -2233,16 +2233,13 @@ out: | |||
| 2233 | return ret; | 2233 | return ret; |
| 2234 | } | 2234 | } |
| 2235 | 2235 | ||
| 2236 | static ssize_t ocfs2_file_aio_write(struct kiocb *iocb, | 2236 | static ssize_t ocfs2_file_write_iter(struct kiocb *iocb, |
| 2237 | const struct iovec *iov, | 2237 | struct iov_iter *from) |
| 2238 | unsigned long nr_segs, | ||
| 2239 | loff_t pos) | ||
| 2240 | { | 2238 | { |
| 2241 | int ret, direct_io, appending, rw_level, have_alloc_sem = 0; | 2239 | int ret, direct_io, appending, rw_level, have_alloc_sem = 0; |
| 2242 | int can_do_direct, has_refcount = 0; | 2240 | int can_do_direct, has_refcount = 0; |
| 2243 | ssize_t written = 0; | 2241 | ssize_t written = 0; |
| 2244 | size_t ocount; /* original count */ | 2242 | size_t count = iov_iter_count(from); |
| 2245 | size_t count; /* after file limit checks */ | ||
| 2246 | loff_t old_size, *ppos = &iocb->ki_pos; | 2243 | loff_t old_size, *ppos = &iocb->ki_pos; |
| 2247 | u32 old_clusters; | 2244 | u32 old_clusters; |
| 2248 | struct file *file = iocb->ki_filp; | 2245 | struct file *file = iocb->ki_filp; |
| @@ -2256,7 +2253,7 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb, | |||
| 2256 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | 2253 | (unsigned long long)OCFS2_I(inode)->ip_blkno, |
| 2257 | file->f_path.dentry->d_name.len, | 2254 | file->f_path.dentry->d_name.len, |
| 2258 | file->f_path.dentry->d_name.name, | 2255 | file->f_path.dentry->d_name.name, |
| 2259 | (unsigned int)nr_segs); | 2256 | (unsigned int)from->nr_segs); /* GRRRRR */ |
| 2260 | 2257 | ||
| 2261 | if (iocb->ki_nbytes == 0) | 2258 | if (iocb->ki_nbytes == 0) |
| 2262 | return 0; | 2259 | return 0; |
| @@ -2354,29 +2351,21 @@ relock: | |||
| 2354 | /* communicate with ocfs2_dio_end_io */ | 2351 | /* communicate with ocfs2_dio_end_io */ |
| 2355 | ocfs2_iocb_set_rw_locked(iocb, rw_level); | 2352 | ocfs2_iocb_set_rw_locked(iocb, rw_level); |
| 2356 | 2353 | ||
| 2357 | ret = generic_segment_checks(iov, &nr_segs, &ocount, | ||
| 2358 | VERIFY_READ); | ||
| 2359 | if (ret) | ||
| 2360 | goto out_dio; | ||
| 2361 | |||
| 2362 | count = ocount; | ||
| 2363 | ret = generic_write_checks(file, ppos, &count, | 2354 | ret = generic_write_checks(file, ppos, &count, |
| 2364 | S_ISBLK(inode->i_mode)); | 2355 | S_ISBLK(inode->i_mode)); |
| 2365 | if (ret) | 2356 | if (ret) |
| 2366 | goto out_dio; | 2357 | goto out_dio; |
| 2367 | 2358 | ||
| 2359 | iov_iter_truncate(from, count); | ||
| 2368 | if (direct_io) { | 2360 | if (direct_io) { |
| 2369 | written = generic_file_direct_write(iocb, iov, &nr_segs, *ppos, | 2361 | written = generic_file_direct_write(iocb, from, *ppos); |
| 2370 | count, ocount); | ||
| 2371 | if (written < 0) { | 2362 | if (written < 0) { |
| 2372 | ret = written; | 2363 | ret = written; |
| 2373 | goto out_dio; | 2364 | goto out_dio; |
| 2374 | } | 2365 | } |
| 2375 | } else { | 2366 | } else { |
| 2376 | struct iov_iter from; | ||
| 2377 | iov_iter_init(&from, iov, nr_segs, count, 0); | ||
| 2378 | current->backing_dev_info = file->f_mapping->backing_dev_info; | 2367 | current->backing_dev_info = file->f_mapping->backing_dev_info; |
| 2379 | written = generic_perform_write(file, &from, *ppos); | 2368 | written = generic_perform_write(file, from, *ppos); |
| 2380 | if (likely(written >= 0)) | 2369 | if (likely(written >= 0)) |
| 2381 | iocb->ki_pos = *ppos + written; | 2370 | iocb->ki_pos = *ppos + written; |
| 2382 | current->backing_dev_info = NULL; | 2371 | current->backing_dev_info = NULL; |
| @@ -2441,84 +2430,6 @@ out_sems: | |||
| 2441 | return ret; | 2430 | return ret; |
| 2442 | } | 2431 | } |
| 2443 | 2432 | ||
| 2444 | static int ocfs2_splice_to_file(struct pipe_inode_info *pipe, | ||
| 2445 | struct file *out, | ||
| 2446 | struct splice_desc *sd) | ||
| 2447 | { | ||
| 2448 | int ret; | ||
| 2449 | |||
| 2450 | ret = ocfs2_prepare_inode_for_write(out, &sd->pos, | ||
| 2451 | sd->total_len, 0, NULL, NULL); | ||
| 2452 | if (ret < 0) { | ||
| 2453 | mlog_errno(ret); | ||
| 2454 | return ret; | ||
| 2455 | } | ||
| 2456 | |||
| 2457 | return splice_from_pipe_feed(pipe, sd, pipe_to_file); | ||
| 2458 | } | ||
| 2459 | |||
| 2460 | static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe, | ||
| 2461 | struct file *out, | ||
| 2462 | loff_t *ppos, | ||
| 2463 | size_t len, | ||
| 2464 | unsigned int flags) | ||
| 2465 | { | ||
| 2466 | int ret; | ||
| 2467 | struct address_space *mapping = out->f_mapping; | ||
| 2468 | struct inode *inode = mapping->host; | ||
| 2469 | struct splice_desc sd = { | ||
| 2470 | .total_len = len, | ||
| 2471 | .flags = flags, | ||
| 2472 | .pos = *ppos, | ||
| 2473 | .u.file = out, | ||
| 2474 | }; | ||
| 2475 | |||
| 2476 | |||
| 2477 | trace_ocfs2_file_splice_write(inode, out, out->f_path.dentry, | ||
| 2478 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | ||
| 2479 | out->f_path.dentry->d_name.len, | ||
| 2480 | out->f_path.dentry->d_name.name, len); | ||
| 2481 | |||
| 2482 | pipe_lock(pipe); | ||
| 2483 | |||
| 2484 | splice_from_pipe_begin(&sd); | ||
| 2485 | do { | ||
| 2486 | ret = splice_from_pipe_next(pipe, &sd); | ||
| 2487 | if (ret <= 0) | ||
| 2488 | break; | ||
| 2489 | |||
| 2490 | mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD); | ||
| 2491 | ret = ocfs2_rw_lock(inode, 1); | ||
| 2492 | if (ret < 0) | ||
| 2493 | mlog_errno(ret); | ||
| 2494 | else { | ||
| 2495 | ret = ocfs2_splice_to_file(pipe, out, &sd); | ||
| 2496 | ocfs2_rw_unlock(inode, 1); | ||
| 2497 | } | ||
| 2498 | mutex_unlock(&inode->i_mutex); | ||
| 2499 | } while (ret > 0); | ||
| 2500 | splice_from_pipe_end(pipe, &sd); | ||
| 2501 | |||
| 2502 | pipe_unlock(pipe); | ||
| 2503 | |||
| 2504 | if (sd.num_spliced) | ||
| 2505 | ret = sd.num_spliced; | ||
| 2506 | |||
| 2507 | if (ret > 0) { | ||
| 2508 | int err; | ||
| 2509 | |||
| 2510 | err = generic_write_sync(out, *ppos, ret); | ||
| 2511 | if (err) | ||
| 2512 | ret = err; | ||
| 2513 | else | ||
| 2514 | *ppos += ret; | ||
| 2515 | |||
| 2516 | balance_dirty_pages_ratelimited(mapping); | ||
| 2517 | } | ||
| 2518 | |||
| 2519 | return ret; | ||
| 2520 | } | ||
| 2521 | |||
| 2522 | static ssize_t ocfs2_file_splice_read(struct file *in, | 2433 | static ssize_t ocfs2_file_splice_read(struct file *in, |
| 2523 | loff_t *ppos, | 2434 | loff_t *ppos, |
| 2524 | struct pipe_inode_info *pipe, | 2435 | struct pipe_inode_info *pipe, |
| @@ -2534,7 +2445,7 @@ static ssize_t ocfs2_file_splice_read(struct file *in, | |||
| 2534 | in->f_path.dentry->d_name.name, len); | 2445 | in->f_path.dentry->d_name.name, len); |
| 2535 | 2446 | ||
| 2536 | /* | 2447 | /* |
| 2537 | * See the comment in ocfs2_file_aio_read() | 2448 | * See the comment in ocfs2_file_read_iter() |
| 2538 | */ | 2449 | */ |
| 2539 | ret = ocfs2_inode_lock_atime(inode, in->f_path.mnt, &lock_level); | 2450 | ret = ocfs2_inode_lock_atime(inode, in->f_path.mnt, &lock_level); |
| 2540 | if (ret < 0) { | 2451 | if (ret < 0) { |
| @@ -2549,10 +2460,8 @@ bail: | |||
| 2549 | return ret; | 2460 | return ret; |
| 2550 | } | 2461 | } |
| 2551 | 2462 | ||
| 2552 | static ssize_t ocfs2_file_aio_read(struct kiocb *iocb, | 2463 | static ssize_t ocfs2_file_read_iter(struct kiocb *iocb, |
| 2553 | const struct iovec *iov, | 2464 | struct iov_iter *to) |
| 2554 | unsigned long nr_segs, | ||
| 2555 | loff_t pos) | ||
| 2556 | { | 2465 | { |
| 2557 | int ret = 0, rw_level = -1, have_alloc_sem = 0, lock_level = 0; | 2466 | int ret = 0, rw_level = -1, have_alloc_sem = 0, lock_level = 0; |
| 2558 | struct file *filp = iocb->ki_filp; | 2467 | struct file *filp = iocb->ki_filp; |
| @@ -2561,7 +2470,8 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb, | |||
| 2561 | trace_ocfs2_file_aio_read(inode, filp, filp->f_path.dentry, | 2470 | trace_ocfs2_file_aio_read(inode, filp, filp->f_path.dentry, |
| 2562 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | 2471 | (unsigned long long)OCFS2_I(inode)->ip_blkno, |
| 2563 | filp->f_path.dentry->d_name.len, | 2472 | filp->f_path.dentry->d_name.len, |
| 2564 | filp->f_path.dentry->d_name.name, nr_segs); | 2473 | filp->f_path.dentry->d_name.name, |
| 2474 | to->nr_segs); /* GRRRRR */ | ||
| 2565 | 2475 | ||
| 2566 | 2476 | ||
| 2567 | if (!inode) { | 2477 | if (!inode) { |
| @@ -2606,13 +2516,13 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb, | |||
| 2606 | } | 2516 | } |
| 2607 | ocfs2_inode_unlock(inode, lock_level); | 2517 | ocfs2_inode_unlock(inode, lock_level); |
| 2608 | 2518 | ||
| 2609 | ret = generic_file_aio_read(iocb, iov, nr_segs, iocb->ki_pos); | 2519 | ret = generic_file_read_iter(iocb, to); |
| 2610 | trace_generic_file_aio_read_ret(ret); | 2520 | trace_generic_file_aio_read_ret(ret); |
| 2611 | 2521 | ||
| 2612 | /* buffered aio wouldn't have proper lock coverage today */ | 2522 | /* buffered aio wouldn't have proper lock coverage today */ |
| 2613 | BUG_ON(ret == -EIOCBQUEUED && !(filp->f_flags & O_DIRECT)); | 2523 | BUG_ON(ret == -EIOCBQUEUED && !(filp->f_flags & O_DIRECT)); |
| 2614 | 2524 | ||
| 2615 | /* see ocfs2_file_aio_write */ | 2525 | /* see ocfs2_file_write_iter */ |
| 2616 | if (ret == -EIOCBQUEUED || !ocfs2_iocb_is_rw_locked(iocb)) { | 2526 | if (ret == -EIOCBQUEUED || !ocfs2_iocb_is_rw_locked(iocb)) { |
| 2617 | rw_level = -1; | 2527 | rw_level = -1; |
| 2618 | have_alloc_sem = 0; | 2528 | have_alloc_sem = 0; |
| @@ -2705,14 +2615,14 @@ const struct inode_operations ocfs2_special_file_iops = { | |||
| 2705 | */ | 2615 | */ |
| 2706 | const struct file_operations ocfs2_fops = { | 2616 | const struct file_operations ocfs2_fops = { |
| 2707 | .llseek = ocfs2_file_llseek, | 2617 | .llseek = ocfs2_file_llseek, |
| 2708 | .read = do_sync_read, | 2618 | .read = new_sync_read, |
| 2709 | .write = do_sync_write, | 2619 | .write = new_sync_write, |
| 2710 | .mmap = ocfs2_mmap, | 2620 | .mmap = ocfs2_mmap, |
| 2711 | .fsync = ocfs2_sync_file, | 2621 | .fsync = ocfs2_sync_file, |
| 2712 | .release = ocfs2_file_release, | 2622 | .release = ocfs2_file_release, |
| 2713 | .open = ocfs2_file_open, | 2623 | .open = ocfs2_file_open, |
| 2714 | .aio_read = ocfs2_file_aio_read, | 2624 | .read_iter = ocfs2_file_read_iter, |
| 2715 | .aio_write = ocfs2_file_aio_write, | 2625 | .write_iter = ocfs2_file_write_iter, |
| 2716 | .unlocked_ioctl = ocfs2_ioctl, | 2626 | .unlocked_ioctl = ocfs2_ioctl, |
| 2717 | #ifdef CONFIG_COMPAT | 2627 | #ifdef CONFIG_COMPAT |
| 2718 | .compat_ioctl = ocfs2_compat_ioctl, | 2628 | .compat_ioctl = ocfs2_compat_ioctl, |
| @@ -2720,7 +2630,7 @@ const struct file_operations ocfs2_fops = { | |||
| 2720 | .lock = ocfs2_lock, | 2630 | .lock = ocfs2_lock, |
| 2721 | .flock = ocfs2_flock, | 2631 | .flock = ocfs2_flock, |
| 2722 | .splice_read = ocfs2_file_splice_read, | 2632 | .splice_read = ocfs2_file_splice_read, |
| 2723 | .splice_write = ocfs2_file_splice_write, | 2633 | .splice_write = iter_file_splice_write, |
| 2724 | .fallocate = ocfs2_fallocate, | 2634 | .fallocate = ocfs2_fallocate, |
| 2725 | }; | 2635 | }; |
| 2726 | 2636 | ||
| @@ -2753,21 +2663,21 @@ const struct file_operations ocfs2_dops = { | |||
| 2753 | */ | 2663 | */ |
| 2754 | const struct file_operations ocfs2_fops_no_plocks = { | 2664 | const struct file_operations ocfs2_fops_no_plocks = { |
| 2755 | .llseek = ocfs2_file_llseek, | 2665 | .llseek = ocfs2_file_llseek, |
| 2756 | .read = do_sync_read, | 2666 | .read = new_sync_read, |
| 2757 | .write = do_sync_write, | 2667 | .write = new_sync_write, |
| 2758 | .mmap = ocfs2_mmap, | 2668 | .mmap = ocfs2_mmap, |
| 2759 | .fsync = ocfs2_sync_file, | 2669 | .fsync = ocfs2_sync_file, |
| 2760 | .release = ocfs2_file_release, | 2670 | .release = ocfs2_file_release, |
| 2761 | .open = ocfs2_file_open, | 2671 | .open = ocfs2_file_open, |
| 2762 | .aio_read = ocfs2_file_aio_read, | 2672 | .read_iter = ocfs2_file_read_iter, |
| 2763 | .aio_write = ocfs2_file_aio_write, | 2673 | .write_iter = ocfs2_file_write_iter, |
| 2764 | .unlocked_ioctl = ocfs2_ioctl, | 2674 | .unlocked_ioctl = ocfs2_ioctl, |
| 2765 | #ifdef CONFIG_COMPAT | 2675 | #ifdef CONFIG_COMPAT |
| 2766 | .compat_ioctl = ocfs2_compat_ioctl, | 2676 | .compat_ioctl = ocfs2_compat_ioctl, |
| 2767 | #endif | 2677 | #endif |
| 2768 | .flock = ocfs2_flock, | 2678 | .flock = ocfs2_flock, |
| 2769 | .splice_read = ocfs2_file_splice_read, | 2679 | .splice_read = ocfs2_file_splice_read, |
| 2770 | .splice_write = ocfs2_file_splice_write, | 2680 | .splice_write = iter_file_splice_write, |
| 2771 | .fallocate = ocfs2_fallocate, | 2681 | .fallocate = ocfs2_fallocate, |
| 2772 | }; | 2682 | }; |
| 2773 | 2683 | ||
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index 2060fc398445..8add6f1030d7 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c | |||
| @@ -205,6 +205,21 @@ static struct inode *ocfs2_get_init_inode(struct inode *dir, umode_t mode) | |||
| 205 | return inode; | 205 | return inode; |
| 206 | } | 206 | } |
| 207 | 207 | ||
| 208 | static void ocfs2_cleanup_add_entry_failure(struct ocfs2_super *osb, | ||
| 209 | struct dentry *dentry, struct inode *inode) | ||
| 210 | { | ||
| 211 | struct ocfs2_dentry_lock *dl = dentry->d_fsdata; | ||
| 212 | |||
| 213 | ocfs2_simple_drop_lockres(osb, &dl->dl_lockres); | ||
| 214 | ocfs2_lock_res_free(&dl->dl_lockres); | ||
| 215 | BUG_ON(dl->dl_count != 1); | ||
| 216 | spin_lock(&dentry_attach_lock); | ||
| 217 | dentry->d_fsdata = NULL; | ||
| 218 | spin_unlock(&dentry_attach_lock); | ||
| 219 | kfree(dl); | ||
| 220 | iput(inode); | ||
| 221 | } | ||
| 222 | |||
| 208 | static int ocfs2_mknod(struct inode *dir, | 223 | static int ocfs2_mknod(struct inode *dir, |
| 209 | struct dentry *dentry, | 224 | struct dentry *dentry, |
| 210 | umode_t mode, | 225 | umode_t mode, |
| @@ -231,6 +246,7 @@ static int ocfs2_mknod(struct inode *dir, | |||
| 231 | sigset_t oldset; | 246 | sigset_t oldset; |
| 232 | int did_block_signals = 0; | 247 | int did_block_signals = 0; |
| 233 | struct posix_acl *default_acl = NULL, *acl = NULL; | 248 | struct posix_acl *default_acl = NULL, *acl = NULL; |
| 249 | struct ocfs2_dentry_lock *dl = NULL; | ||
| 234 | 250 | ||
| 235 | trace_ocfs2_mknod(dir, dentry, dentry->d_name.len, dentry->d_name.name, | 251 | trace_ocfs2_mknod(dir, dentry, dentry->d_name.len, dentry->d_name.name, |
| 236 | (unsigned long long)OCFS2_I(dir)->ip_blkno, | 252 | (unsigned long long)OCFS2_I(dir)->ip_blkno, |
| @@ -423,6 +439,8 @@ static int ocfs2_mknod(struct inode *dir, | |||
| 423 | goto leave; | 439 | goto leave; |
| 424 | } | 440 | } |
| 425 | 441 | ||
| 442 | dl = dentry->d_fsdata; | ||
| 443 | |||
| 426 | status = ocfs2_add_entry(handle, dentry, inode, | 444 | status = ocfs2_add_entry(handle, dentry, inode, |
| 427 | OCFS2_I(inode)->ip_blkno, parent_fe_bh, | 445 | OCFS2_I(inode)->ip_blkno, parent_fe_bh, |
| 428 | &lookup); | 446 | &lookup); |
| @@ -469,6 +487,9 @@ leave: | |||
| 469 | * ocfs2_delete_inode will mutex_lock again. | 487 | * ocfs2_delete_inode will mutex_lock again. |
| 470 | */ | 488 | */ |
| 471 | if ((status < 0) && inode) { | 489 | if ((status < 0) && inode) { |
| 490 | if (dl) | ||
| 491 | ocfs2_cleanup_add_entry_failure(osb, dentry, inode); | ||
| 492 | |||
| 472 | OCFS2_I(inode)->ip_flags |= OCFS2_INODE_SKIP_ORPHAN_DIR; | 493 | OCFS2_I(inode)->ip_flags |= OCFS2_INODE_SKIP_ORPHAN_DIR; |
| 473 | clear_nlink(inode); | 494 | clear_nlink(inode); |
| 474 | iput(inode); | 495 | iput(inode); |
| @@ -991,6 +1012,65 @@ leave: | |||
| 991 | return status; | 1012 | return status; |
| 992 | } | 1013 | } |
| 993 | 1014 | ||
| 1015 | static int ocfs2_check_if_ancestor(struct ocfs2_super *osb, | ||
| 1016 | u64 src_inode_no, u64 dest_inode_no) | ||
| 1017 | { | ||
| 1018 | int ret = 0, i = 0; | ||
| 1019 | u64 parent_inode_no = 0; | ||
| 1020 | u64 child_inode_no = src_inode_no; | ||
| 1021 | struct inode *child_inode; | ||
| 1022 | |||
| 1023 | #define MAX_LOOKUP_TIMES 32 | ||
| 1024 | while (1) { | ||
| 1025 | child_inode = ocfs2_iget(osb, child_inode_no, 0, 0); | ||
| 1026 | if (IS_ERR(child_inode)) { | ||
| 1027 | ret = PTR_ERR(child_inode); | ||
| 1028 | break; | ||
| 1029 | } | ||
| 1030 | |||
| 1031 | ret = ocfs2_inode_lock(child_inode, NULL, 0); | ||
| 1032 | if (ret < 0) { | ||
| 1033 | iput(child_inode); | ||
| 1034 | if (ret != -ENOENT) | ||
| 1035 | mlog_errno(ret); | ||
| 1036 | break; | ||
| 1037 | } | ||
| 1038 | |||
| 1039 | ret = ocfs2_lookup_ino_from_name(child_inode, "..", 2, | ||
| 1040 | &parent_inode_no); | ||
| 1041 | ocfs2_inode_unlock(child_inode, 0); | ||
| 1042 | iput(child_inode); | ||
| 1043 | if (ret < 0) { | ||
| 1044 | ret = -ENOENT; | ||
| 1045 | break; | ||
| 1046 | } | ||
| 1047 | |||
| 1048 | if (parent_inode_no == dest_inode_no) { | ||
| 1049 | ret = 1; | ||
| 1050 | break; | ||
| 1051 | } | ||
| 1052 | |||
| 1053 | if (parent_inode_no == osb->root_inode->i_ino) { | ||
| 1054 | ret = 0; | ||
| 1055 | break; | ||
| 1056 | } | ||
| 1057 | |||
| 1058 | child_inode_no = parent_inode_no; | ||
| 1059 | |||
| 1060 | if (++i >= MAX_LOOKUP_TIMES) { | ||
| 1061 | mlog(ML_NOTICE, "max lookup times reached, filesystem " | ||
| 1062 | "may have nested directories, " | ||
| 1063 | "src inode: %llu, dest inode: %llu.\n", | ||
| 1064 | (unsigned long long)src_inode_no, | ||
| 1065 | (unsigned long long)dest_inode_no); | ||
| 1066 | ret = 0; | ||
| 1067 | break; | ||
| 1068 | } | ||
| 1069 | } | ||
| 1070 | |||
| 1071 | return ret; | ||
| 1072 | } | ||
| 1073 | |||
| 994 | /* | 1074 | /* |
| 995 | * The only place this should be used is rename! | 1075 | * The only place this should be used is rename! |
| 996 | * if they have the same id, then the 1st one is the only one locked. | 1076 | * if they have the same id, then the 1st one is the only one locked. |
| @@ -1002,6 +1082,7 @@ static int ocfs2_double_lock(struct ocfs2_super *osb, | |||
| 1002 | struct inode *inode2) | 1082 | struct inode *inode2) |
| 1003 | { | 1083 | { |
| 1004 | int status; | 1084 | int status; |
| 1085 | int inode1_is_ancestor, inode2_is_ancestor; | ||
| 1005 | struct ocfs2_inode_info *oi1 = OCFS2_I(inode1); | 1086 | struct ocfs2_inode_info *oi1 = OCFS2_I(inode1); |
| 1006 | struct ocfs2_inode_info *oi2 = OCFS2_I(inode2); | 1087 | struct ocfs2_inode_info *oi2 = OCFS2_I(inode2); |
| 1007 | struct buffer_head **tmpbh; | 1088 | struct buffer_head **tmpbh; |
| @@ -1015,9 +1096,26 @@ static int ocfs2_double_lock(struct ocfs2_super *osb, | |||
| 1015 | if (*bh2) | 1096 | if (*bh2) |
| 1016 | *bh2 = NULL; | 1097 | *bh2 = NULL; |
| 1017 | 1098 | ||
| 1018 | /* we always want to lock the one with the lower lockid first. */ | 1099 | /* we always want to lock the one with the lower lockid first. |
| 1100 | * and if they are nested, we lock ancestor first */ | ||
| 1019 | if (oi1->ip_blkno != oi2->ip_blkno) { | 1101 | if (oi1->ip_blkno != oi2->ip_blkno) { |
| 1020 | if (oi1->ip_blkno < oi2->ip_blkno) { | 1102 | inode1_is_ancestor = ocfs2_check_if_ancestor(osb, oi2->ip_blkno, |
| 1103 | oi1->ip_blkno); | ||
| 1104 | if (inode1_is_ancestor < 0) { | ||
| 1105 | status = inode1_is_ancestor; | ||
| 1106 | goto bail; | ||
| 1107 | } | ||
| 1108 | |||
| 1109 | inode2_is_ancestor = ocfs2_check_if_ancestor(osb, oi1->ip_blkno, | ||
| 1110 | oi2->ip_blkno); | ||
| 1111 | if (inode2_is_ancestor < 0) { | ||
| 1112 | status = inode2_is_ancestor; | ||
| 1113 | goto bail; | ||
| 1114 | } | ||
| 1115 | |||
| 1116 | if ((inode1_is_ancestor == 1) || | ||
| 1117 | (oi1->ip_blkno < oi2->ip_blkno && | ||
| 1118 | inode2_is_ancestor == 0)) { | ||
| 1021 | /* switch id1 and id2 around */ | 1119 | /* switch id1 and id2 around */ |
| 1022 | tmpbh = bh2; | 1120 | tmpbh = bh2; |
| 1023 | bh2 = bh1; | 1121 | bh2 = bh1; |
| @@ -1098,6 +1196,7 @@ static int ocfs2_rename(struct inode *old_dir, | |||
| 1098 | struct ocfs2_dir_lookup_result old_entry_lookup = { NULL, }; | 1196 | struct ocfs2_dir_lookup_result old_entry_lookup = { NULL, }; |
| 1099 | struct ocfs2_dir_lookup_result orphan_insert = { NULL, }; | 1197 | struct ocfs2_dir_lookup_result orphan_insert = { NULL, }; |
| 1100 | struct ocfs2_dir_lookup_result target_insert = { NULL, }; | 1198 | struct ocfs2_dir_lookup_result target_insert = { NULL, }; |
| 1199 | bool should_add_orphan = false; | ||
| 1101 | 1200 | ||
| 1102 | /* At some point it might be nice to break this function up a | 1201 | /* At some point it might be nice to break this function up a |
| 1103 | * bit. */ | 1202 | * bit. */ |
| @@ -1134,6 +1233,21 @@ static int ocfs2_rename(struct inode *old_dir, | |||
| 1134 | goto bail; | 1233 | goto bail; |
| 1135 | } | 1234 | } |
| 1136 | rename_lock = 1; | 1235 | rename_lock = 1; |
| 1236 | |||
| 1237 | /* here we cannot guarantee the inodes haven't just been | ||
| 1238 | * changed, so check if they are nested again */ | ||
| 1239 | status = ocfs2_check_if_ancestor(osb, new_dir->i_ino, | ||
| 1240 | old_inode->i_ino); | ||
| 1241 | if (status < 0) { | ||
| 1242 | mlog_errno(status); | ||
| 1243 | goto bail; | ||
| 1244 | } else if (status == 1) { | ||
| 1245 | status = -EPERM; | ||
| 1246 | trace_ocfs2_rename_not_permitted( | ||
| 1247 | (unsigned long long)old_inode->i_ino, | ||
| 1248 | (unsigned long long)new_dir->i_ino); | ||
| 1249 | goto bail; | ||
| 1250 | } | ||
| 1137 | } | 1251 | } |
| 1138 | 1252 | ||
| 1139 | /* if old and new are the same, this'll just do one lock. */ | 1253 | /* if old and new are the same, this'll just do one lock. */ |
| @@ -1304,6 +1418,7 @@ static int ocfs2_rename(struct inode *old_dir, | |||
| 1304 | mlog_errno(status); | 1418 | mlog_errno(status); |
| 1305 | goto bail; | 1419 | goto bail; |
| 1306 | } | 1420 | } |
| 1421 | should_add_orphan = true; | ||
| 1307 | } | 1422 | } |
| 1308 | } else { | 1423 | } else { |
| 1309 | BUG_ON(new_dentry->d_parent->d_inode != new_dir); | 1424 | BUG_ON(new_dentry->d_parent->d_inode != new_dir); |
| @@ -1348,17 +1463,6 @@ static int ocfs2_rename(struct inode *old_dir, | |||
| 1348 | goto bail; | 1463 | goto bail; |
| 1349 | } | 1464 | } |
| 1350 | 1465 | ||
| 1351 | if (S_ISDIR(new_inode->i_mode) || | ||
| 1352 | (ocfs2_read_links_count(newfe) == 1)) { | ||
| 1353 | status = ocfs2_orphan_add(osb, handle, new_inode, | ||
| 1354 | newfe_bh, orphan_name, | ||
| 1355 | &orphan_insert, orphan_dir); | ||
| 1356 | if (status < 0) { | ||
| 1357 | mlog_errno(status); | ||
| 1358 | goto bail; | ||
| 1359 | } | ||
| 1360 | } | ||
| 1361 | |||
| 1362 | /* change the dirent to point to the correct inode */ | 1466 | /* change the dirent to point to the correct inode */ |
| 1363 | status = ocfs2_update_entry(new_dir, handle, &target_lookup_res, | 1467 | status = ocfs2_update_entry(new_dir, handle, &target_lookup_res, |
| 1364 | old_inode); | 1468 | old_inode); |
| @@ -1373,6 +1477,15 @@ static int ocfs2_rename(struct inode *old_dir, | |||
| 1373 | else | 1477 | else |
| 1374 | ocfs2_add_links_count(newfe, -1); | 1478 | ocfs2_add_links_count(newfe, -1); |
| 1375 | ocfs2_journal_dirty(handle, newfe_bh); | 1479 | ocfs2_journal_dirty(handle, newfe_bh); |
| 1480 | if (should_add_orphan) { | ||
| 1481 | status = ocfs2_orphan_add(osb, handle, new_inode, | ||
| 1482 | newfe_bh, orphan_name, | ||
| 1483 | &orphan_insert, orphan_dir); | ||
| 1484 | if (status < 0) { | ||
| 1485 | mlog_errno(status); | ||
| 1486 | goto bail; | ||
| 1487 | } | ||
| 1488 | } | ||
| 1376 | } else { | 1489 | } else { |
| 1377 | /* if the name was not found in new_dir, add it now */ | 1490 | /* if the name was not found in new_dir, add it now */ |
| 1378 | status = ocfs2_add_entry(handle, new_dentry, old_inode, | 1491 | status = ocfs2_add_entry(handle, new_dentry, old_inode, |
| @@ -1642,6 +1755,7 @@ static int ocfs2_symlink(struct inode *dir, | |||
| 1642 | struct ocfs2_dir_lookup_result lookup = { NULL, }; | 1755 | struct ocfs2_dir_lookup_result lookup = { NULL, }; |
| 1643 | sigset_t oldset; | 1756 | sigset_t oldset; |
| 1644 | int did_block_signals = 0; | 1757 | int did_block_signals = 0; |
| 1758 | struct ocfs2_dentry_lock *dl = NULL; | ||
| 1645 | 1759 | ||
| 1646 | trace_ocfs2_symlink_begin(dir, dentry, symname, | 1760 | trace_ocfs2_symlink_begin(dir, dentry, symname, |
| 1647 | dentry->d_name.len, dentry->d_name.name); | 1761 | dentry->d_name.len, dentry->d_name.name); |
| @@ -1830,6 +1944,8 @@ static int ocfs2_symlink(struct inode *dir, | |||
| 1830 | goto bail; | 1944 | goto bail; |
| 1831 | } | 1945 | } |
| 1832 | 1946 | ||
| 1947 | dl = dentry->d_fsdata; | ||
| 1948 | |||
| 1833 | status = ocfs2_add_entry(handle, dentry, inode, | 1949 | status = ocfs2_add_entry(handle, dentry, inode, |
| 1834 | le64_to_cpu(fe->i_blkno), parent_fe_bh, | 1950 | le64_to_cpu(fe->i_blkno), parent_fe_bh, |
| 1835 | &lookup); | 1951 | &lookup); |
| @@ -1864,6 +1980,9 @@ bail: | |||
| 1864 | if (xattr_ac) | 1980 | if (xattr_ac) |
| 1865 | ocfs2_free_alloc_context(xattr_ac); | 1981 | ocfs2_free_alloc_context(xattr_ac); |
| 1866 | if ((status < 0) && inode) { | 1982 | if ((status < 0) && inode) { |
| 1983 | if (dl) | ||
| 1984 | ocfs2_cleanup_add_entry_failure(osb, dentry, inode); | ||
| 1985 | |||
| 1867 | OCFS2_I(inode)->ip_flags |= OCFS2_INODE_SKIP_ORPHAN_DIR; | 1986 | OCFS2_I(inode)->ip_flags |= OCFS2_INODE_SKIP_ORPHAN_DIR; |
| 1868 | clear_nlink(inode); | 1987 | clear_nlink(inode); |
| 1869 | iput(inode); | 1988 | iput(inode); |
diff --git a/fs/ocfs2/ocfs2_trace.h b/fs/ocfs2/ocfs2_trace.h index 1b60c62aa9d6..6cb019b7c6a8 100644 --- a/fs/ocfs2/ocfs2_trace.h +++ b/fs/ocfs2/ocfs2_trace.h | |||
| @@ -2292,6 +2292,8 @@ TRACE_EVENT(ocfs2_rename, | |||
| 2292 | __entry->new_len, __get_str(new_name)) | 2292 | __entry->new_len, __get_str(new_name)) |
| 2293 | ); | 2293 | ); |
| 2294 | 2294 | ||
| 2295 | DEFINE_OCFS2_ULL_ULL_EVENT(ocfs2_rename_not_permitted); | ||
| 2296 | |||
| 2295 | TRACE_EVENT(ocfs2_rename_target_exists, | 2297 | TRACE_EVENT(ocfs2_rename_target_exists, |
| 2296 | TP_PROTO(int new_len, const char *new_name), | 2298 | TP_PROTO(int new_len, const char *new_name), |
| 2297 | TP_ARGS(new_len, new_name), | 2299 | TP_ARGS(new_len, new_name), |
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index 714e53b9cc66..636aab69ead5 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c | |||
| @@ -4288,9 +4288,16 @@ static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir, | |||
| 4288 | goto out; | 4288 | goto out; |
| 4289 | } | 4289 | } |
| 4290 | 4290 | ||
| 4291 | error = ocfs2_rw_lock(inode, 1); | ||
| 4292 | if (error) { | ||
| 4293 | mlog_errno(error); | ||
| 4294 | goto out; | ||
| 4295 | } | ||
| 4296 | |||
| 4291 | error = ocfs2_inode_lock(inode, &old_bh, 1); | 4297 | error = ocfs2_inode_lock(inode, &old_bh, 1); |
| 4292 | if (error) { | 4298 | if (error) { |
| 4293 | mlog_errno(error); | 4299 | mlog_errno(error); |
| 4300 | ocfs2_rw_unlock(inode, 1); | ||
| 4294 | goto out; | 4301 | goto out; |
| 4295 | } | 4302 | } |
| 4296 | 4303 | ||
| @@ -4302,6 +4309,7 @@ static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir, | |||
| 4302 | up_write(&OCFS2_I(inode)->ip_xattr_sem); | 4309 | up_write(&OCFS2_I(inode)->ip_xattr_sem); |
| 4303 | 4310 | ||
| 4304 | ocfs2_inode_unlock(inode, 1); | 4311 | ocfs2_inode_unlock(inode, 1); |
| 4312 | ocfs2_rw_unlock(inode, 1); | ||
| 4305 | brelse(old_bh); | 4313 | brelse(old_bh); |
| 4306 | 4314 | ||
| 4307 | if (error) { | 4315 | if (error) { |
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index c7a89cea5c5d..ddb662b32447 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c | |||
| @@ -1925,15 +1925,11 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err) | |||
| 1925 | 1925 | ||
| 1926 | ocfs2_shutdown_local_alloc(osb); | 1926 | ocfs2_shutdown_local_alloc(osb); |
| 1927 | 1927 | ||
| 1928 | ocfs2_truncate_log_shutdown(osb); | ||
| 1929 | |||
| 1928 | /* This will disable recovery and flush any recovery work. */ | 1930 | /* This will disable recovery and flush any recovery work. */ |
| 1929 | ocfs2_recovery_exit(osb); | 1931 | ocfs2_recovery_exit(osb); |
| 1930 | 1932 | ||
| 1931 | /* | ||
| 1932 | * During dismount, when it recovers another node it will call | ||
| 1933 | * ocfs2_recover_orphans and queue delayed work osb_truncate_log_wq. | ||
| 1934 | */ | ||
| 1935 | ocfs2_truncate_log_shutdown(osb); | ||
| 1936 | |||
| 1937 | ocfs2_journal_shutdown(osb); | 1933 | ocfs2_journal_shutdown(osb); |
| 1938 | 1934 | ||
| 1939 | ocfs2_sync_blockdev(sb); | 1935 | ocfs2_sync_blockdev(sb); |
diff --git a/fs/omfs/file.c b/fs/omfs/file.c index 54d57d6ba68d..902e88527fce 100644 --- a/fs/omfs/file.c +++ b/fs/omfs/file.c | |||
| @@ -337,10 +337,10 @@ static sector_t omfs_bmap(struct address_space *mapping, sector_t block) | |||
| 337 | 337 | ||
| 338 | const struct file_operations omfs_file_operations = { | 338 | const struct file_operations omfs_file_operations = { |
| 339 | .llseek = generic_file_llseek, | 339 | .llseek = generic_file_llseek, |
| 340 | .read = do_sync_read, | 340 | .read = new_sync_read, |
| 341 | .write = do_sync_write, | 341 | .write = new_sync_write, |
| 342 | .aio_read = generic_file_aio_read, | 342 | .read_iter = generic_file_read_iter, |
| 343 | .aio_write = generic_file_aio_write, | 343 | .write_iter = generic_file_write_iter, |
| 344 | .mmap = generic_file_mmap, | 344 | .mmap = generic_file_mmap, |
| 345 | .fsync = generic_file_fsync, | 345 | .fsync = generic_file_fsync, |
| 346 | .splice_read = generic_file_splice_read, | 346 | .splice_read = generic_file_splice_read, |
| @@ -725,6 +725,12 @@ static int do_dentry_open(struct file *f, | |||
| 725 | } | 725 | } |
| 726 | if ((f->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ) | 726 | if ((f->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ) |
| 727 | i_readcount_inc(inode); | 727 | i_readcount_inc(inode); |
| 728 | if ((f->f_mode & FMODE_READ) && | ||
| 729 | likely(f->f_op->read || f->f_op->aio_read || f->f_op->read_iter)) | ||
| 730 | f->f_mode |= FMODE_CAN_READ; | ||
| 731 | if ((f->f_mode & FMODE_WRITE) && | ||
| 732 | likely(f->f_op->write || f->f_op->aio_write || f->f_op->write_iter)) | ||
| 733 | f->f_mode |= FMODE_CAN_WRITE; | ||
| 728 | 734 | ||
| 729 | f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC); | 735 | f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC); |
| 730 | 736 | ||
| @@ -116,50 +116,6 @@ void pipe_wait(struct pipe_inode_info *pipe) | |||
| 116 | pipe_lock(pipe); | 116 | pipe_lock(pipe); |
| 117 | } | 117 | } |
| 118 | 118 | ||
| 119 | static int | ||
| 120 | pipe_iov_copy_from_user(void *to, struct iovec *iov, unsigned long len, | ||
| 121 | int atomic) | ||
| 122 | { | ||
| 123 | unsigned long copy; | ||
| 124 | |||
| 125 | while (len > 0) { | ||
| 126 | while (!iov->iov_len) | ||
| 127 | iov++; | ||
| 128 | copy = min_t(unsigned long, len, iov->iov_len); | ||
| 129 | |||
| 130 | if (atomic) { | ||
| 131 | if (__copy_from_user_inatomic(to, iov->iov_base, copy)) | ||
| 132 | return -EFAULT; | ||
| 133 | } else { | ||
| 134 | if (copy_from_user(to, iov->iov_base, copy)) | ||
| 135 | return -EFAULT; | ||
| 136 | } | ||
| 137 | to += copy; | ||
| 138 | len -= copy; | ||
| 139 | iov->iov_base += copy; | ||
| 140 | iov->iov_len -= copy; | ||
| 141 | } | ||
| 142 | return 0; | ||
| 143 | } | ||
| 144 | |||
| 145 | /* | ||
| 146 | * Pre-fault in the user memory, so we can use atomic copies. | ||
| 147 | */ | ||
| 148 | static void iov_fault_in_pages_read(struct iovec *iov, unsigned long len) | ||
| 149 | { | ||
| 150 | while (!iov->iov_len) | ||
| 151 | iov++; | ||
| 152 | |||
| 153 | while (len > 0) { | ||
| 154 | unsigned long this_len; | ||
| 155 | |||
| 156 | this_len = min_t(unsigned long, len, iov->iov_len); | ||
| 157 | fault_in_pages_readable(iov->iov_base, this_len); | ||
| 158 | len -= this_len; | ||
| 159 | iov++; | ||
| 160 | } | ||
| 161 | } | ||
| 162 | |||
| 163 | static void anon_pipe_buf_release(struct pipe_inode_info *pipe, | 119 | static void anon_pipe_buf_release(struct pipe_inode_info *pipe, |
| 164 | struct pipe_buffer *buf) | 120 | struct pipe_buffer *buf) |
| 165 | { | 121 | { |
| @@ -271,24 +227,18 @@ static const struct pipe_buf_operations packet_pipe_buf_ops = { | |||
| 271 | }; | 227 | }; |
| 272 | 228 | ||
| 273 | static ssize_t | 229 | static ssize_t |
| 274 | pipe_read(struct kiocb *iocb, const struct iovec *_iov, | 230 | pipe_read(struct kiocb *iocb, struct iov_iter *to) |
| 275 | unsigned long nr_segs, loff_t pos) | ||
| 276 | { | 231 | { |
| 232 | size_t total_len = iov_iter_count(to); | ||
| 277 | struct file *filp = iocb->ki_filp; | 233 | struct file *filp = iocb->ki_filp; |
| 278 | struct pipe_inode_info *pipe = filp->private_data; | 234 | struct pipe_inode_info *pipe = filp->private_data; |
| 279 | int do_wakeup; | 235 | int do_wakeup; |
| 280 | ssize_t ret; | 236 | ssize_t ret; |
| 281 | struct iovec *iov = (struct iovec *)_iov; | ||
| 282 | size_t total_len; | ||
| 283 | struct iov_iter iter; | ||
| 284 | 237 | ||
| 285 | total_len = iov_length(iov, nr_segs); | ||
| 286 | /* Null read succeeds. */ | 238 | /* Null read succeeds. */ |
| 287 | if (unlikely(total_len == 0)) | 239 | if (unlikely(total_len == 0)) |
| 288 | return 0; | 240 | return 0; |
| 289 | 241 | ||
| 290 | iov_iter_init(&iter, iov, nr_segs, total_len, 0); | ||
| 291 | |||
| 292 | do_wakeup = 0; | 242 | do_wakeup = 0; |
| 293 | ret = 0; | 243 | ret = 0; |
| 294 | __pipe_lock(pipe); | 244 | __pipe_lock(pipe); |
| @@ -312,7 +262,7 @@ pipe_read(struct kiocb *iocb, const struct iovec *_iov, | |||
| 312 | break; | 262 | break; |
| 313 | } | 263 | } |
| 314 | 264 | ||
| 315 | written = copy_page_to_iter(buf->page, buf->offset, chars, &iter); | 265 | written = copy_page_to_iter(buf->page, buf->offset, chars, to); |
| 316 | if (unlikely(written < chars)) { | 266 | if (unlikely(written < chars)) { |
| 317 | if (!ret) | 267 | if (!ret) |
| 318 | ret = -EFAULT; | 268 | ret = -EFAULT; |
| @@ -386,24 +336,19 @@ static inline int is_packetized(struct file *file) | |||
| 386 | } | 336 | } |
| 387 | 337 | ||
| 388 | static ssize_t | 338 | static ssize_t |
| 389 | pipe_write(struct kiocb *iocb, const struct iovec *_iov, | 339 | pipe_write(struct kiocb *iocb, struct iov_iter *from) |
| 390 | unsigned long nr_segs, loff_t ppos) | ||
| 391 | { | 340 | { |
| 392 | struct file *filp = iocb->ki_filp; | 341 | struct file *filp = iocb->ki_filp; |
| 393 | struct pipe_inode_info *pipe = filp->private_data; | 342 | struct pipe_inode_info *pipe = filp->private_data; |
| 394 | ssize_t ret; | 343 | ssize_t ret = 0; |
| 395 | int do_wakeup; | 344 | int do_wakeup = 0; |
| 396 | struct iovec *iov = (struct iovec *)_iov; | 345 | size_t total_len = iov_iter_count(from); |
| 397 | size_t total_len; | ||
| 398 | ssize_t chars; | 346 | ssize_t chars; |
| 399 | 347 | ||
| 400 | total_len = iov_length(iov, nr_segs); | ||
| 401 | /* Null write succeeds. */ | 348 | /* Null write succeeds. */ |
| 402 | if (unlikely(total_len == 0)) | 349 | if (unlikely(total_len == 0)) |
| 403 | return 0; | 350 | return 0; |
| 404 | 351 | ||
| 405 | do_wakeup = 0; | ||
| 406 | ret = 0; | ||
| 407 | __pipe_lock(pipe); | 352 | __pipe_lock(pipe); |
| 408 | 353 | ||
| 409 | if (!pipe->readers) { | 354 | if (!pipe->readers) { |
| @@ -422,38 +367,19 @@ pipe_write(struct kiocb *iocb, const struct iovec *_iov, | |||
| 422 | int offset = buf->offset + buf->len; | 367 | int offset = buf->offset + buf->len; |
| 423 | 368 | ||
| 424 | if (ops->can_merge && offset + chars <= PAGE_SIZE) { | 369 | if (ops->can_merge && offset + chars <= PAGE_SIZE) { |
| 425 | int error, atomic = 1; | 370 | int error = ops->confirm(pipe, buf); |
| 426 | void *addr; | ||
| 427 | |||
| 428 | error = ops->confirm(pipe, buf); | ||
| 429 | if (error) | 371 | if (error) |
| 430 | goto out; | 372 | goto out; |
| 431 | 373 | ||
| 432 | iov_fault_in_pages_read(iov, chars); | 374 | ret = copy_page_from_iter(buf->page, offset, chars, from); |
| 433 | redo1: | 375 | if (unlikely(ret < chars)) { |
| 434 | if (atomic) | 376 | error = -EFAULT; |
| 435 | addr = kmap_atomic(buf->page); | ||
| 436 | else | ||
| 437 | addr = kmap(buf->page); | ||
| 438 | error = pipe_iov_copy_from_user(offset + addr, iov, | ||
| 439 | chars, atomic); | ||
| 440 | if (atomic) | ||
| 441 | kunmap_atomic(addr); | ||
| 442 | else | ||
| 443 | kunmap(buf->page); | ||
| 444 | ret = error; | ||
| 445 | do_wakeup = 1; | ||
| 446 | if (error) { | ||
| 447 | if (atomic) { | ||
| 448 | atomic = 0; | ||
| 449 | goto redo1; | ||
| 450 | } | ||
| 451 | goto out; | 377 | goto out; |
| 452 | } | 378 | } |
| 379 | do_wakeup = 1; | ||
| 453 | buf->len += chars; | 380 | buf->len += chars; |
| 454 | total_len -= chars; | ||
| 455 | ret = chars; | 381 | ret = chars; |
| 456 | if (!total_len) | 382 | if (!iov_iter_count(from)) |
| 457 | goto out; | 383 | goto out; |
| 458 | } | 384 | } |
| 459 | } | 385 | } |
| @@ -472,8 +398,7 @@ redo1: | |||
| 472 | int newbuf = (pipe->curbuf + bufs) & (pipe->buffers-1); | 398 | int newbuf = (pipe->curbuf + bufs) & (pipe->buffers-1); |
| 473 | struct pipe_buffer *buf = pipe->bufs + newbuf; | 399 | struct pipe_buffer *buf = pipe->bufs + newbuf; |
| 474 | struct page *page = pipe->tmp_page; | 400 | struct page *page = pipe->tmp_page; |
| 475 | char *src; | 401 | int copied; |
| 476 | int error, atomic = 1; | ||
| 477 | 402 | ||
| 478 | if (!page) { | 403 | if (!page) { |
| 479 | page = alloc_page(GFP_HIGHUSER); | 404 | page = alloc_page(GFP_HIGHUSER); |
| @@ -489,40 +414,19 @@ redo1: | |||
| 489 | * FIXME! Is this really true? | 414 | * FIXME! Is this really true? |
| 490 | */ | 415 | */ |
| 491 | do_wakeup = 1; | 416 | do_wakeup = 1; |
| 492 | chars = PAGE_SIZE; | 417 | copied = copy_page_from_iter(page, 0, PAGE_SIZE, from); |
| 493 | if (chars > total_len) | 418 | if (unlikely(copied < PAGE_SIZE && iov_iter_count(from))) { |
| 494 | chars = total_len; | ||
| 495 | |||
| 496 | iov_fault_in_pages_read(iov, chars); | ||
| 497 | redo2: | ||
| 498 | if (atomic) | ||
| 499 | src = kmap_atomic(page); | ||
| 500 | else | ||
| 501 | src = kmap(page); | ||
| 502 | |||
| 503 | error = pipe_iov_copy_from_user(src, iov, chars, | ||
| 504 | atomic); | ||
| 505 | if (atomic) | ||
| 506 | kunmap_atomic(src); | ||
| 507 | else | ||
| 508 | kunmap(page); | ||
| 509 | |||
| 510 | if (unlikely(error)) { | ||
| 511 | if (atomic) { | ||
| 512 | atomic = 0; | ||
| 513 | goto redo2; | ||
| 514 | } | ||
| 515 | if (!ret) | 419 | if (!ret) |
| 516 | ret = error; | 420 | ret = -EFAULT; |
| 517 | break; | 421 | break; |
| 518 | } | 422 | } |
| 519 | ret += chars; | 423 | ret += copied; |
| 520 | 424 | ||
| 521 | /* Insert it into the buffer array */ | 425 | /* Insert it into the buffer array */ |
| 522 | buf->page = page; | 426 | buf->page = page; |
| 523 | buf->ops = &anon_pipe_buf_ops; | 427 | buf->ops = &anon_pipe_buf_ops; |
| 524 | buf->offset = 0; | 428 | buf->offset = 0; |
| 525 | buf->len = chars; | 429 | buf->len = copied; |
| 526 | buf->flags = 0; | 430 | buf->flags = 0; |
| 527 | if (is_packetized(filp)) { | 431 | if (is_packetized(filp)) { |
| 528 | buf->ops = &packet_pipe_buf_ops; | 432 | buf->ops = &packet_pipe_buf_ops; |
| @@ -531,8 +435,7 @@ redo2: | |||
| 531 | pipe->nrbufs = ++bufs; | 435 | pipe->nrbufs = ++bufs; |
| 532 | pipe->tmp_page = NULL; | 436 | pipe->tmp_page = NULL; |
| 533 | 437 | ||
| 534 | total_len -= chars; | 438 | if (!iov_iter_count(from)) |
| 535 | if (!total_len) | ||
| 536 | break; | 439 | break; |
| 537 | } | 440 | } |
| 538 | if (bufs < pipe->buffers) | 441 | if (bufs < pipe->buffers) |
| @@ -1044,10 +947,10 @@ err: | |||
| 1044 | const struct file_operations pipefifo_fops = { | 947 | const struct file_operations pipefifo_fops = { |
| 1045 | .open = fifo_open, | 948 | .open = fifo_open, |
| 1046 | .llseek = no_llseek, | 949 | .llseek = no_llseek, |
| 1047 | .read = do_sync_read, | 950 | .read = new_sync_read, |
| 1048 | .aio_read = pipe_read, | 951 | .read_iter = pipe_read, |
| 1049 | .write = do_sync_write, | 952 | .write = new_sync_write, |
| 1050 | .aio_write = pipe_write, | 953 | .write_iter = pipe_write, |
| 1051 | .poll = pipe_poll, | 954 | .poll = pipe_poll, |
| 1052 | .unlocked_ioctl = pipe_ioctl, | 955 | .unlocked_ioctl = pipe_ioctl, |
| 1053 | .release = pipe_release, | 956 | .release = pipe_release, |
diff --git a/fs/proc/stat.c b/fs/proc/stat.c index 9d231e9e5f0e..bf2d03f8fd3e 100644 --- a/fs/proc/stat.c +++ b/fs/proc/stat.c | |||
| @@ -184,29 +184,11 @@ static int show_stat(struct seq_file *p, void *v) | |||
| 184 | 184 | ||
| 185 | static int stat_open(struct inode *inode, struct file *file) | 185 | static int stat_open(struct inode *inode, struct file *file) |
| 186 | { | 186 | { |
| 187 | size_t size = 1024 + 128 * num_possible_cpus(); | 187 | size_t size = 1024 + 128 * num_online_cpus(); |
| 188 | char *buf; | ||
| 189 | struct seq_file *m; | ||
| 190 | int res; | ||
| 191 | 188 | ||
| 192 | /* minimum size to display an interrupt count : 2 bytes */ | 189 | /* minimum size to display an interrupt count : 2 bytes */ |
| 193 | size += 2 * nr_irqs; | 190 | size += 2 * nr_irqs; |
| 194 | 191 | return single_open_size(file, show_stat, NULL, size); | |
| 195 | /* don't ask for more than the kmalloc() max size */ | ||
| 196 | if (size > KMALLOC_MAX_SIZE) | ||
| 197 | size = KMALLOC_MAX_SIZE; | ||
| 198 | buf = kmalloc(size, GFP_KERNEL); | ||
| 199 | if (!buf) | ||
| 200 | return -ENOMEM; | ||
| 201 | |||
| 202 | res = single_open(file, show_stat, NULL); | ||
| 203 | if (!res) { | ||
| 204 | m = file->private_data; | ||
| 205 | m->buf = buf; | ||
| 206 | m->size = ksize(buf); | ||
| 207 | } else | ||
| 208 | kfree(buf); | ||
| 209 | return res; | ||
| 210 | } | 192 | } |
| 211 | 193 | ||
| 212 | static const struct file_operations proc_stat_operations = { | 194 | static const struct file_operations proc_stat_operations = { |
diff --git a/fs/ramfs/file-mmu.c b/fs/ramfs/file-mmu.c index 1e56a4e8cf7c..4f56de822d2f 100644 --- a/fs/ramfs/file-mmu.c +++ b/fs/ramfs/file-mmu.c | |||
| @@ -31,14 +31,14 @@ | |||
| 31 | #include "internal.h" | 31 | #include "internal.h" |
| 32 | 32 | ||
| 33 | const struct file_operations ramfs_file_operations = { | 33 | const struct file_operations ramfs_file_operations = { |
| 34 | .read = do_sync_read, | 34 | .read = new_sync_read, |
| 35 | .aio_read = generic_file_aio_read, | 35 | .read_iter = generic_file_read_iter, |
| 36 | .write = do_sync_write, | 36 | .write = new_sync_write, |
| 37 | .aio_write = generic_file_aio_write, | 37 | .write_iter = generic_file_write_iter, |
| 38 | .mmap = generic_file_mmap, | 38 | .mmap = generic_file_mmap, |
| 39 | .fsync = noop_fsync, | 39 | .fsync = noop_fsync, |
| 40 | .splice_read = generic_file_splice_read, | 40 | .splice_read = generic_file_splice_read, |
| 41 | .splice_write = generic_file_splice_write, | 41 | .splice_write = iter_file_splice_write, |
| 42 | .llseek = generic_file_llseek, | 42 | .llseek = generic_file_llseek, |
| 43 | }; | 43 | }; |
| 44 | 44 | ||
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c index 0b3d8e4cb2fa..dda012ad4208 100644 --- a/fs/ramfs/file-nommu.c +++ b/fs/ramfs/file-nommu.c | |||
| @@ -37,13 +37,13 @@ static int ramfs_nommu_mmap(struct file *file, struct vm_area_struct *vma); | |||
| 37 | const struct file_operations ramfs_file_operations = { | 37 | const struct file_operations ramfs_file_operations = { |
| 38 | .mmap = ramfs_nommu_mmap, | 38 | .mmap = ramfs_nommu_mmap, |
| 39 | .get_unmapped_area = ramfs_nommu_get_unmapped_area, | 39 | .get_unmapped_area = ramfs_nommu_get_unmapped_area, |
| 40 | .read = do_sync_read, | 40 | .read = new_sync_read, |
| 41 | .aio_read = generic_file_aio_read, | 41 | .read_iter = generic_file_read_iter, |
| 42 | .write = do_sync_write, | 42 | .write = new_sync_write, |
| 43 | .aio_write = generic_file_aio_write, | 43 | .write_iter = generic_file_write_iter, |
| 44 | .fsync = noop_fsync, | 44 | .fsync = noop_fsync, |
| 45 | .splice_read = generic_file_splice_read, | 45 | .splice_read = generic_file_splice_read, |
| 46 | .splice_write = generic_file_splice_write, | 46 | .splice_write = iter_file_splice_write, |
| 47 | .llseek = generic_file_llseek, | 47 | .llseek = generic_file_llseek, |
| 48 | }; | 48 | }; |
| 49 | 49 | ||
diff --git a/fs/read_write.c b/fs/read_write.c index 31c6efa43183..009d8542a889 100644 --- a/fs/read_write.c +++ b/fs/read_write.c | |||
| @@ -25,11 +25,12 @@ | |||
| 25 | typedef ssize_t (*io_fn_t)(struct file *, char __user *, size_t, loff_t *); | 25 | typedef ssize_t (*io_fn_t)(struct file *, char __user *, size_t, loff_t *); |
| 26 | typedef ssize_t (*iov_fn_t)(struct kiocb *, const struct iovec *, | 26 | typedef ssize_t (*iov_fn_t)(struct kiocb *, const struct iovec *, |
| 27 | unsigned long, loff_t); | 27 | unsigned long, loff_t); |
| 28 | typedef ssize_t (*iter_fn_t)(struct kiocb *, struct iov_iter *); | ||
| 28 | 29 | ||
| 29 | const struct file_operations generic_ro_fops = { | 30 | const struct file_operations generic_ro_fops = { |
| 30 | .llseek = generic_file_llseek, | 31 | .llseek = generic_file_llseek, |
| 31 | .read = do_sync_read, | 32 | .read = new_sync_read, |
| 32 | .aio_read = generic_file_aio_read, | 33 | .read_iter = generic_file_read_iter, |
| 33 | .mmap = generic_file_readonly_mmap, | 34 | .mmap = generic_file_readonly_mmap, |
| 34 | .splice_read = generic_file_splice_read, | 35 | .splice_read = generic_file_splice_read, |
| 35 | }; | 36 | }; |
| @@ -390,13 +391,34 @@ ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *pp | |||
| 390 | 391 | ||
| 391 | EXPORT_SYMBOL(do_sync_read); | 392 | EXPORT_SYMBOL(do_sync_read); |
| 392 | 393 | ||
| 394 | ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos) | ||
| 395 | { | ||
| 396 | struct iovec iov = { .iov_base = buf, .iov_len = len }; | ||
| 397 | struct kiocb kiocb; | ||
| 398 | struct iov_iter iter; | ||
| 399 | ssize_t ret; | ||
| 400 | |||
| 401 | init_sync_kiocb(&kiocb, filp); | ||
| 402 | kiocb.ki_pos = *ppos; | ||
| 403 | kiocb.ki_nbytes = len; | ||
| 404 | iov_iter_init(&iter, READ, &iov, 1, len); | ||
| 405 | |||
| 406 | ret = filp->f_op->read_iter(&kiocb, &iter); | ||
| 407 | if (-EIOCBQUEUED == ret) | ||
| 408 | ret = wait_on_sync_kiocb(&kiocb); | ||
| 409 | *ppos = kiocb.ki_pos; | ||
| 410 | return ret; | ||
| 411 | } | ||
| 412 | |||
| 413 | EXPORT_SYMBOL(new_sync_read); | ||
| 414 | |||
| 393 | ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos) | 415 | ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos) |
| 394 | { | 416 | { |
| 395 | ssize_t ret; | 417 | ssize_t ret; |
| 396 | 418 | ||
| 397 | if (!(file->f_mode & FMODE_READ)) | 419 | if (!(file->f_mode & FMODE_READ)) |
| 398 | return -EBADF; | 420 | return -EBADF; |
| 399 | if (!file->f_op->read && !file->f_op->aio_read) | 421 | if (!(file->f_mode & FMODE_CAN_READ)) |
| 400 | return -EINVAL; | 422 | return -EINVAL; |
| 401 | if (unlikely(!access_ok(VERIFY_WRITE, buf, count))) | 423 | if (unlikely(!access_ok(VERIFY_WRITE, buf, count))) |
| 402 | return -EFAULT; | 424 | return -EFAULT; |
| @@ -406,8 +428,10 @@ ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos) | |||
| 406 | count = ret; | 428 | count = ret; |
| 407 | if (file->f_op->read) | 429 | if (file->f_op->read) |
| 408 | ret = file->f_op->read(file, buf, count, pos); | 430 | ret = file->f_op->read(file, buf, count, pos); |
| 409 | else | 431 | else if (file->f_op->aio_read) |
| 410 | ret = do_sync_read(file, buf, count, pos); | 432 | ret = do_sync_read(file, buf, count, pos); |
| 433 | else | ||
| 434 | ret = new_sync_read(file, buf, count, pos); | ||
| 411 | if (ret > 0) { | 435 | if (ret > 0) { |
| 412 | fsnotify_access(file); | 436 | fsnotify_access(file); |
| 413 | add_rchar(current, ret); | 437 | add_rchar(current, ret); |
| @@ -439,13 +463,34 @@ ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, lof | |||
| 439 | 463 | ||
| 440 | EXPORT_SYMBOL(do_sync_write); | 464 | EXPORT_SYMBOL(do_sync_write); |
| 441 | 465 | ||
| 466 | ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos) | ||
| 467 | { | ||
| 468 | struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = len }; | ||
| 469 | struct kiocb kiocb; | ||
| 470 | struct iov_iter iter; | ||
| 471 | ssize_t ret; | ||
| 472 | |||
| 473 | init_sync_kiocb(&kiocb, filp); | ||
| 474 | kiocb.ki_pos = *ppos; | ||
| 475 | kiocb.ki_nbytes = len; | ||
| 476 | iov_iter_init(&iter, WRITE, &iov, 1, len); | ||
| 477 | |||
| 478 | ret = filp->f_op->write_iter(&kiocb, &iter); | ||
| 479 | if (-EIOCBQUEUED == ret) | ||
| 480 | ret = wait_on_sync_kiocb(&kiocb); | ||
| 481 | *ppos = kiocb.ki_pos; | ||
| 482 | return ret; | ||
| 483 | } | ||
| 484 | |||
| 485 | EXPORT_SYMBOL(new_sync_write); | ||
| 486 | |||
| 442 | ssize_t __kernel_write(struct file *file, const char *buf, size_t count, loff_t *pos) | 487 | ssize_t __kernel_write(struct file *file, const char *buf, size_t count, loff_t *pos) |
| 443 | { | 488 | { |
| 444 | mm_segment_t old_fs; | 489 | mm_segment_t old_fs; |
| 445 | const char __user *p; | 490 | const char __user *p; |
| 446 | ssize_t ret; | 491 | ssize_t ret; |
| 447 | 492 | ||
| 448 | if (!file->f_op->write && !file->f_op->aio_write) | 493 | if (!(file->f_mode & FMODE_CAN_WRITE)) |
| 449 | return -EINVAL; | 494 | return -EINVAL; |
| 450 | 495 | ||
| 451 | old_fs = get_fs(); | 496 | old_fs = get_fs(); |
| @@ -455,8 +500,10 @@ ssize_t __kernel_write(struct file *file, const char *buf, size_t count, loff_t | |||
| 455 | count = MAX_RW_COUNT; | 500 | count = MAX_RW_COUNT; |
| 456 | if (file->f_op->write) | 501 | if (file->f_op->write) |
| 457 | ret = file->f_op->write(file, p, count, pos); | 502 | ret = file->f_op->write(file, p, count, pos); |
| 458 | else | 503 | else if (file->f_op->aio_write) |
| 459 | ret = do_sync_write(file, p, count, pos); | 504 | ret = do_sync_write(file, p, count, pos); |
| 505 | else | ||
| 506 | ret = new_sync_write(file, p, count, pos); | ||
| 460 | set_fs(old_fs); | 507 | set_fs(old_fs); |
| 461 | if (ret > 0) { | 508 | if (ret > 0) { |
| 462 | fsnotify_modify(file); | 509 | fsnotify_modify(file); |
| @@ -472,7 +519,7 @@ ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_ | |||
| 472 | 519 | ||
| 473 | if (!(file->f_mode & FMODE_WRITE)) | 520 | if (!(file->f_mode & FMODE_WRITE)) |
| 474 | return -EBADF; | 521 | return -EBADF; |
| 475 | if (!file->f_op->write && !file->f_op->aio_write) | 522 | if (!(file->f_mode & FMODE_CAN_WRITE)) |
| 476 | return -EINVAL; | 523 | return -EINVAL; |
| 477 | if (unlikely(!access_ok(VERIFY_READ, buf, count))) | 524 | if (unlikely(!access_ok(VERIFY_READ, buf, count))) |
| 478 | return -EFAULT; | 525 | return -EFAULT; |
| @@ -483,8 +530,10 @@ ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_ | |||
| 483 | file_start_write(file); | 530 | file_start_write(file); |
| 484 | if (file->f_op->write) | 531 | if (file->f_op->write) |
| 485 | ret = file->f_op->write(file, buf, count, pos); | 532 | ret = file->f_op->write(file, buf, count, pos); |
| 486 | else | 533 | else if (file->f_op->aio_write) |
| 487 | ret = do_sync_write(file, buf, count, pos); | 534 | ret = do_sync_write(file, buf, count, pos); |
| 535 | else | ||
| 536 | ret = new_sync_write(file, buf, count, pos); | ||
| 488 | if (ret > 0) { | 537 | if (ret > 0) { |
| 489 | fsnotify_modify(file); | 538 | fsnotify_modify(file); |
| 490 | add_wchar(current, ret); | 539 | add_wchar(current, ret); |
| @@ -601,6 +650,25 @@ unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to) | |||
| 601 | } | 650 | } |
| 602 | EXPORT_SYMBOL(iov_shorten); | 651 | EXPORT_SYMBOL(iov_shorten); |
| 603 | 652 | ||
| 653 | static ssize_t do_iter_readv_writev(struct file *filp, int rw, const struct iovec *iov, | ||
| 654 | unsigned long nr_segs, size_t len, loff_t *ppos, iter_fn_t fn) | ||
| 655 | { | ||
| 656 | struct kiocb kiocb; | ||
| 657 | struct iov_iter iter; | ||
| 658 | ssize_t ret; | ||
| 659 | |||
| 660 | init_sync_kiocb(&kiocb, filp); | ||
| 661 | kiocb.ki_pos = *ppos; | ||
| 662 | kiocb.ki_nbytes = len; | ||
| 663 | |||
| 664 | iov_iter_init(&iter, rw, iov, nr_segs, len); | ||
| 665 | ret = fn(&kiocb, &iter); | ||
| 666 | if (ret == -EIOCBQUEUED) | ||
| 667 | ret = wait_on_sync_kiocb(&kiocb); | ||
| 668 | *ppos = kiocb.ki_pos; | ||
| 669 | return ret; | ||
| 670 | } | ||
| 671 | |||
| 604 | static ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov, | 672 | static ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov, |
| 605 | unsigned long nr_segs, size_t len, loff_t *ppos, iov_fn_t fn) | 673 | unsigned long nr_segs, size_t len, loff_t *ppos, iov_fn_t fn) |
| 606 | { | 674 | { |
| @@ -738,6 +806,7 @@ static ssize_t do_readv_writev(int type, struct file *file, | |||
| 738 | ssize_t ret; | 806 | ssize_t ret; |
| 739 | io_fn_t fn; | 807 | io_fn_t fn; |
| 740 | iov_fn_t fnv; | 808 | iov_fn_t fnv; |
| 809 | iter_fn_t iter_fn; | ||
| 741 | 810 | ||
| 742 | ret = rw_copy_check_uvector(type, uvector, nr_segs, | 811 | ret = rw_copy_check_uvector(type, uvector, nr_segs, |
| 743 | ARRAY_SIZE(iovstack), iovstack, &iov); | 812 | ARRAY_SIZE(iovstack), iovstack, &iov); |
| @@ -753,13 +822,18 @@ static ssize_t do_readv_writev(int type, struct file *file, | |||
| 753 | if (type == READ) { | 822 | if (type == READ) { |
| 754 | fn = file->f_op->read; | 823 | fn = file->f_op->read; |
| 755 | fnv = file->f_op->aio_read; | 824 | fnv = file->f_op->aio_read; |
| 825 | iter_fn = file->f_op->read_iter; | ||
| 756 | } else { | 826 | } else { |
| 757 | fn = (io_fn_t)file->f_op->write; | 827 | fn = (io_fn_t)file->f_op->write; |
| 758 | fnv = file->f_op->aio_write; | 828 | fnv = file->f_op->aio_write; |
| 829 | iter_fn = file->f_op->write_iter; | ||
| 759 | file_start_write(file); | 830 | file_start_write(file); |
| 760 | } | 831 | } |
| 761 | 832 | ||
| 762 | if (fnv) | 833 | if (iter_fn) |
| 834 | ret = do_iter_readv_writev(file, type, iov, nr_segs, tot_len, | ||
| 835 | pos, iter_fn); | ||
| 836 | else if (fnv) | ||
| 763 | ret = do_sync_readv_writev(file, iov, nr_segs, tot_len, | 837 | ret = do_sync_readv_writev(file, iov, nr_segs, tot_len, |
| 764 | pos, fnv); | 838 | pos, fnv); |
| 765 | else | 839 | else |
| @@ -785,7 +859,7 @@ ssize_t vfs_readv(struct file *file, const struct iovec __user *vec, | |||
| 785 | { | 859 | { |
| 786 | if (!(file->f_mode & FMODE_READ)) | 860 | if (!(file->f_mode & FMODE_READ)) |
| 787 | return -EBADF; | 861 | return -EBADF; |
| 788 | if (!file->f_op->aio_read && !file->f_op->read) | 862 | if (!(file->f_mode & FMODE_CAN_READ)) |
| 789 | return -EINVAL; | 863 | return -EINVAL; |
| 790 | 864 | ||
| 791 | return do_readv_writev(READ, file, vec, vlen, pos); | 865 | return do_readv_writev(READ, file, vec, vlen, pos); |
| @@ -798,7 +872,7 @@ ssize_t vfs_writev(struct file *file, const struct iovec __user *vec, | |||
| 798 | { | 872 | { |
| 799 | if (!(file->f_mode & FMODE_WRITE)) | 873 | if (!(file->f_mode & FMODE_WRITE)) |
| 800 | return -EBADF; | 874 | return -EBADF; |
| 801 | if (!file->f_op->aio_write && !file->f_op->write) | 875 | if (!(file->f_mode & FMODE_CAN_WRITE)) |
| 802 | return -EINVAL; | 876 | return -EINVAL; |
| 803 | 877 | ||
| 804 | return do_readv_writev(WRITE, file, vec, vlen, pos); | 878 | return do_readv_writev(WRITE, file, vec, vlen, pos); |
| @@ -912,6 +986,7 @@ static ssize_t compat_do_readv_writev(int type, struct file *file, | |||
| 912 | ssize_t ret; | 986 | ssize_t ret; |
| 913 | io_fn_t fn; | 987 | io_fn_t fn; |
| 914 | iov_fn_t fnv; | 988 | iov_fn_t fnv; |
| 989 | iter_fn_t iter_fn; | ||
| 915 | 990 | ||
| 916 | ret = compat_rw_copy_check_uvector(type, uvector, nr_segs, | 991 | ret = compat_rw_copy_check_uvector(type, uvector, nr_segs, |
| 917 | UIO_FASTIOV, iovstack, &iov); | 992 | UIO_FASTIOV, iovstack, &iov); |
| @@ -927,13 +1002,18 @@ static ssize_t compat_do_readv_writev(int type, struct file *file, | |||
| 927 | if (type == READ) { | 1002 | if (type == READ) { |
| 928 | fn = file->f_op->read; | 1003 | fn = file->f_op->read; |
| 929 | fnv = file->f_op->aio_read; | 1004 | fnv = file->f_op->aio_read; |
| 1005 | iter_fn = file->f_op->read_iter; | ||
| 930 | } else { | 1006 | } else { |
| 931 | fn = (io_fn_t)file->f_op->write; | 1007 | fn = (io_fn_t)file->f_op->write; |
| 932 | fnv = file->f_op->aio_write; | 1008 | fnv = file->f_op->aio_write; |
| 1009 | iter_fn = file->f_op->write_iter; | ||
| 933 | file_start_write(file); | 1010 | file_start_write(file); |
| 934 | } | 1011 | } |
| 935 | 1012 | ||
| 936 | if (fnv) | 1013 | if (iter_fn) |
| 1014 | ret = do_iter_readv_writev(file, type, iov, nr_segs, tot_len, | ||
| 1015 | pos, iter_fn); | ||
| 1016 | else if (fnv) | ||
| 937 | ret = do_sync_readv_writev(file, iov, nr_segs, tot_len, | 1017 | ret = do_sync_readv_writev(file, iov, nr_segs, tot_len, |
| 938 | pos, fnv); | 1018 | pos, fnv); |
| 939 | else | 1019 | else |
| @@ -964,7 +1044,7 @@ static size_t compat_readv(struct file *file, | |||
| 964 | goto out; | 1044 | goto out; |
| 965 | 1045 | ||
| 966 | ret = -EINVAL; | 1046 | ret = -EINVAL; |
| 967 | if (!file->f_op->aio_read && !file->f_op->read) | 1047 | if (!(file->f_mode & FMODE_CAN_READ)) |
| 968 | goto out; | 1048 | goto out; |
| 969 | 1049 | ||
| 970 | ret = compat_do_readv_writev(READ, file, vec, vlen, pos); | 1050 | ret = compat_do_readv_writev(READ, file, vec, vlen, pos); |
| @@ -1041,7 +1121,7 @@ static size_t compat_writev(struct file *file, | |||
| 1041 | goto out; | 1121 | goto out; |
| 1042 | 1122 | ||
| 1043 | ret = -EINVAL; | 1123 | ret = -EINVAL; |
| 1044 | if (!file->f_op->aio_write && !file->f_op->write) | 1124 | if (!(file->f_mode & FMODE_CAN_WRITE)) |
| 1045 | goto out; | 1125 | goto out; |
| 1046 | 1126 | ||
| 1047 | ret = compat_do_readv_writev(WRITE, file, vec, vlen, pos); | 1127 | ret = compat_do_readv_writev(WRITE, file, vec, vlen, pos); |
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c index 5f6c32c668b6..db9e80ba53a0 100644 --- a/fs/reiserfs/file.c +++ b/fs/reiserfs/file.c | |||
| @@ -243,8 +243,8 @@ drop_write_lock: | |||
| 243 | } | 243 | } |
| 244 | 244 | ||
| 245 | const struct file_operations reiserfs_file_operations = { | 245 | const struct file_operations reiserfs_file_operations = { |
| 246 | .read = do_sync_read, | 246 | .read = new_sync_read, |
| 247 | .write = do_sync_write, | 247 | .write = new_sync_write, |
| 248 | .unlocked_ioctl = reiserfs_ioctl, | 248 | .unlocked_ioctl = reiserfs_ioctl, |
| 249 | #ifdef CONFIG_COMPAT | 249 | #ifdef CONFIG_COMPAT |
| 250 | .compat_ioctl = reiserfs_compat_ioctl, | 250 | .compat_ioctl = reiserfs_compat_ioctl, |
| @@ -253,10 +253,10 @@ const struct file_operations reiserfs_file_operations = { | |||
| 253 | .open = reiserfs_file_open, | 253 | .open = reiserfs_file_open, |
| 254 | .release = reiserfs_file_release, | 254 | .release = reiserfs_file_release, |
| 255 | .fsync = reiserfs_sync_file, | 255 | .fsync = reiserfs_sync_file, |
| 256 | .aio_read = generic_file_aio_read, | 256 | .read_iter = generic_file_read_iter, |
| 257 | .aio_write = generic_file_aio_write, | 257 | .write_iter = generic_file_write_iter, |
| 258 | .splice_read = generic_file_splice_read, | 258 | .splice_read = generic_file_splice_read, |
| 259 | .splice_write = generic_file_splice_write, | 259 | .splice_write = iter_file_splice_write, |
| 260 | .llseek = generic_file_llseek, | 260 | .llseek = generic_file_llseek, |
| 261 | }; | 261 | }; |
| 262 | 262 | ||
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index e3ca04894919..63b2b0ec49e6 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c | |||
| @@ -3279,15 +3279,15 @@ static int reiserfs_releasepage(struct page *page, gfp_t unused_gfp_flags) | |||
| 3279 | * to do in this section of the code. | 3279 | * to do in this section of the code. |
| 3280 | */ | 3280 | */ |
| 3281 | static ssize_t reiserfs_direct_IO(int rw, struct kiocb *iocb, | 3281 | static ssize_t reiserfs_direct_IO(int rw, struct kiocb *iocb, |
| 3282 | const struct iovec *iov, loff_t offset, | 3282 | struct iov_iter *iter, loff_t offset) |
| 3283 | unsigned long nr_segs) | ||
| 3284 | { | 3283 | { |
| 3285 | struct file *file = iocb->ki_filp; | 3284 | struct file *file = iocb->ki_filp; |
| 3286 | struct inode *inode = file->f_mapping->host; | 3285 | struct inode *inode = file->f_mapping->host; |
| 3286 | size_t count = iov_iter_count(iter); | ||
| 3287 | ssize_t ret; | 3287 | ssize_t ret; |
| 3288 | 3288 | ||
| 3289 | ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs, | 3289 | ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, |
| 3290 | reiserfs_get_blocks_direct_io); | 3290 | reiserfs_get_blocks_direct_io); |
| 3291 | 3291 | ||
| 3292 | /* | 3292 | /* |
| 3293 | * In case of error extending write may have instantiated a few | 3293 | * In case of error extending write may have instantiated a few |
| @@ -3295,7 +3295,7 @@ static ssize_t reiserfs_direct_IO(int rw, struct kiocb *iocb, | |||
| 3295 | */ | 3295 | */ |
| 3296 | if (unlikely((rw & WRITE) && ret < 0)) { | 3296 | if (unlikely((rw & WRITE) && ret < 0)) { |
| 3297 | loff_t isize = i_size_read(inode); | 3297 | loff_t isize = i_size_read(inode); |
| 3298 | loff_t end = offset + iov_length(iov, nr_segs); | 3298 | loff_t end = offset + count; |
| 3299 | 3299 | ||
| 3300 | if ((end > isize) && inode_newsize_ok(inode, isize) == 0) { | 3300 | if ((end > isize) && inode_newsize_ok(inode, isize) == 0) { |
| 3301 | truncate_setsize(inode, isize); | 3301 | truncate_setsize(inode, isize); |
diff --git a/fs/romfs/mmap-nommu.c b/fs/romfs/mmap-nommu.c index f373bde8f545..ea06c7554860 100644 --- a/fs/romfs/mmap-nommu.c +++ b/fs/romfs/mmap-nommu.c | |||
| @@ -72,8 +72,8 @@ static int romfs_mmap(struct file *file, struct vm_area_struct *vma) | |||
| 72 | 72 | ||
| 73 | const struct file_operations romfs_ro_fops = { | 73 | const struct file_operations romfs_ro_fops = { |
| 74 | .llseek = generic_file_llseek, | 74 | .llseek = generic_file_llseek, |
| 75 | .read = do_sync_read, | 75 | .read = new_sync_read, |
| 76 | .aio_read = generic_file_aio_read, | 76 | .read_iter = generic_file_read_iter, |
| 77 | .splice_read = generic_file_splice_read, | 77 | .splice_read = generic_file_splice_read, |
| 78 | .mmap = romfs_mmap, | 78 | .mmap = romfs_mmap, |
| 79 | .get_unmapped_area = romfs_get_unmapped_area, | 79 | .get_unmapped_area = romfs_get_unmapped_area, |
diff --git a/fs/seq_file.c b/fs/seq_file.c index 1d641bb108d2..3857b720cb1b 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c | |||
| @@ -8,8 +8,10 @@ | |||
| 8 | #include <linux/fs.h> | 8 | #include <linux/fs.h> |
| 9 | #include <linux/export.h> | 9 | #include <linux/export.h> |
| 10 | #include <linux/seq_file.h> | 10 | #include <linux/seq_file.h> |
| 11 | #include <linux/vmalloc.h> | ||
| 11 | #include <linux/slab.h> | 12 | #include <linux/slab.h> |
| 12 | #include <linux/cred.h> | 13 | #include <linux/cred.h> |
| 14 | #include <linux/mm.h> | ||
| 13 | 15 | ||
| 14 | #include <asm/uaccess.h> | 16 | #include <asm/uaccess.h> |
| 15 | #include <asm/page.h> | 17 | #include <asm/page.h> |
| @@ -30,6 +32,16 @@ static void seq_set_overflow(struct seq_file *m) | |||
| 30 | m->count = m->size; | 32 | m->count = m->size; |
| 31 | } | 33 | } |
| 32 | 34 | ||
| 35 | static void *seq_buf_alloc(unsigned long size) | ||
| 36 | { | ||
| 37 | void *buf; | ||
| 38 | |||
| 39 | buf = kmalloc(size, GFP_KERNEL | __GFP_NOWARN); | ||
| 40 | if (!buf && size > PAGE_SIZE) | ||
| 41 | buf = vmalloc(size); | ||
| 42 | return buf; | ||
| 43 | } | ||
| 44 | |||
| 33 | /** | 45 | /** |
| 34 | * seq_open - initialize sequential file | 46 | * seq_open - initialize sequential file |
| 35 | * @file: file we initialize | 47 | * @file: file we initialize |
| @@ -96,7 +108,7 @@ static int traverse(struct seq_file *m, loff_t offset) | |||
| 96 | return 0; | 108 | return 0; |
| 97 | } | 109 | } |
| 98 | if (!m->buf) { | 110 | if (!m->buf) { |
| 99 | m->buf = kmalloc(m->size = PAGE_SIZE, GFP_KERNEL); | 111 | m->buf = seq_buf_alloc(m->size = PAGE_SIZE); |
| 100 | if (!m->buf) | 112 | if (!m->buf) |
| 101 | return -ENOMEM; | 113 | return -ENOMEM; |
| 102 | } | 114 | } |
| @@ -135,9 +147,9 @@ static int traverse(struct seq_file *m, loff_t offset) | |||
| 135 | 147 | ||
| 136 | Eoverflow: | 148 | Eoverflow: |
| 137 | m->op->stop(m, p); | 149 | m->op->stop(m, p); |
| 138 | kfree(m->buf); | 150 | kvfree(m->buf); |
| 139 | m->count = 0; | 151 | m->count = 0; |
| 140 | m->buf = kmalloc(m->size <<= 1, GFP_KERNEL); | 152 | m->buf = seq_buf_alloc(m->size <<= 1); |
| 141 | return !m->buf ? -ENOMEM : -EAGAIN; | 153 | return !m->buf ? -ENOMEM : -EAGAIN; |
| 142 | } | 154 | } |
| 143 | 155 | ||
| @@ -192,7 +204,7 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos) | |||
| 192 | 204 | ||
| 193 | /* grab buffer if we didn't have one */ | 205 | /* grab buffer if we didn't have one */ |
| 194 | if (!m->buf) { | 206 | if (!m->buf) { |
| 195 | m->buf = kmalloc(m->size = PAGE_SIZE, GFP_KERNEL); | 207 | m->buf = seq_buf_alloc(m->size = PAGE_SIZE); |
| 196 | if (!m->buf) | 208 | if (!m->buf) |
| 197 | goto Enomem; | 209 | goto Enomem; |
| 198 | } | 210 | } |
| @@ -232,9 +244,9 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos) | |||
| 232 | if (m->count < m->size) | 244 | if (m->count < m->size) |
| 233 | goto Fill; | 245 | goto Fill; |
| 234 | m->op->stop(m, p); | 246 | m->op->stop(m, p); |
| 235 | kfree(m->buf); | 247 | kvfree(m->buf); |
| 236 | m->count = 0; | 248 | m->count = 0; |
| 237 | m->buf = kmalloc(m->size <<= 1, GFP_KERNEL); | 249 | m->buf = seq_buf_alloc(m->size <<= 1); |
| 238 | if (!m->buf) | 250 | if (!m->buf) |
| 239 | goto Enomem; | 251 | goto Enomem; |
| 240 | m->version = 0; | 252 | m->version = 0; |
| @@ -350,7 +362,7 @@ EXPORT_SYMBOL(seq_lseek); | |||
| 350 | int seq_release(struct inode *inode, struct file *file) | 362 | int seq_release(struct inode *inode, struct file *file) |
| 351 | { | 363 | { |
| 352 | struct seq_file *m = file->private_data; | 364 | struct seq_file *m = file->private_data; |
| 353 | kfree(m->buf); | 365 | kvfree(m->buf); |
| 354 | kfree(m); | 366 | kfree(m); |
| 355 | return 0; | 367 | return 0; |
| 356 | } | 368 | } |
| @@ -605,13 +617,13 @@ EXPORT_SYMBOL(single_open); | |||
| 605 | int single_open_size(struct file *file, int (*show)(struct seq_file *, void *), | 617 | int single_open_size(struct file *file, int (*show)(struct seq_file *, void *), |
| 606 | void *data, size_t size) | 618 | void *data, size_t size) |
| 607 | { | 619 | { |
| 608 | char *buf = kmalloc(size, GFP_KERNEL); | 620 | char *buf = seq_buf_alloc(size); |
| 609 | int ret; | 621 | int ret; |
| 610 | if (!buf) | 622 | if (!buf) |
| 611 | return -ENOMEM; | 623 | return -ENOMEM; |
| 612 | ret = single_open(file, show, data); | 624 | ret = single_open(file, show, data); |
| 613 | if (ret) { | 625 | if (ret) { |
| 614 | kfree(buf); | 626 | kvfree(buf); |
| 615 | return ret; | 627 | return ret; |
| 616 | } | 628 | } |
| 617 | ((struct seq_file *)file->private_data)->buf = buf; | 629 | ((struct seq_file *)file->private_data)->buf = buf; |
diff --git a/fs/splice.c b/fs/splice.c index e246954ea48c..f5cb9ba84510 100644 --- a/fs/splice.c +++ b/fs/splice.c | |||
| @@ -32,6 +32,7 @@ | |||
| 32 | #include <linux/gfp.h> | 32 | #include <linux/gfp.h> |
| 33 | #include <linux/socket.h> | 33 | #include <linux/socket.h> |
| 34 | #include <linux/compat.h> | 34 | #include <linux/compat.h> |
| 35 | #include <linux/aio.h> | ||
| 35 | #include "internal.h" | 36 | #include "internal.h" |
| 36 | 37 | ||
| 37 | /* | 38 | /* |
| @@ -717,63 +718,6 @@ static int pipe_to_sendpage(struct pipe_inode_info *pipe, | |||
| 717 | sd->len, &pos, more); | 718 | sd->len, &pos, more); |
| 718 | } | 719 | } |
| 719 | 720 | ||
| 720 | /* | ||
| 721 | * This is a little more tricky than the file -> pipe splicing. There are | ||
| 722 | * basically three cases: | ||
| 723 | * | ||
| 724 | * - Destination page already exists in the address space and there | ||
| 725 | * are users of it. For that case we have no other option that | ||
| 726 | * copying the data. Tough luck. | ||
| 727 | * - Destination page already exists in the address space, but there | ||
| 728 | * are no users of it. Make sure it's uptodate, then drop it. Fall | ||
| 729 | * through to last case. | ||
| 730 | * - Destination page does not exist, we can add the pipe page to | ||
| 731 | * the page cache and avoid the copy. | ||
| 732 | * | ||
| 733 | * If asked to move pages to the output file (SPLICE_F_MOVE is set in | ||
| 734 | * sd->flags), we attempt to migrate pages from the pipe to the output | ||
| 735 | * file address space page cache. This is possible if no one else has | ||
| 736 | * the pipe page referenced outside of the pipe and page cache. If | ||
| 737 | * SPLICE_F_MOVE isn't set, or we cannot move the page, we simply create | ||
| 738 | * a new page in the output file page cache and fill/dirty that. | ||
| 739 | */ | ||
| 740 | int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf, | ||
| 741 | struct splice_desc *sd) | ||
| 742 | { | ||
| 743 | struct file *file = sd->u.file; | ||
| 744 | struct address_space *mapping = file->f_mapping; | ||
| 745 | unsigned int offset, this_len; | ||
| 746 | struct page *page; | ||
| 747 | void *fsdata; | ||
| 748 | int ret; | ||
| 749 | |||
| 750 | offset = sd->pos & ~PAGE_CACHE_MASK; | ||
| 751 | |||
| 752 | this_len = sd->len; | ||
| 753 | if (this_len + offset > PAGE_CACHE_SIZE) | ||
| 754 | this_len = PAGE_CACHE_SIZE - offset; | ||
| 755 | |||
| 756 | ret = pagecache_write_begin(file, mapping, sd->pos, this_len, | ||
| 757 | AOP_FLAG_UNINTERRUPTIBLE, &page, &fsdata); | ||
| 758 | if (unlikely(ret)) | ||
| 759 | goto out; | ||
| 760 | |||
| 761 | if (buf->page != page) { | ||
| 762 | char *src = kmap_atomic(buf->page); | ||
| 763 | char *dst = kmap_atomic(page); | ||
| 764 | |||
| 765 | memcpy(dst + offset, src + buf->offset, this_len); | ||
| 766 | flush_dcache_page(page); | ||
| 767 | kunmap_atomic(dst); | ||
| 768 | kunmap_atomic(src); | ||
| 769 | } | ||
| 770 | ret = pagecache_write_end(file, mapping, sd->pos, this_len, this_len, | ||
| 771 | page, fsdata); | ||
| 772 | out: | ||
| 773 | return ret; | ||
| 774 | } | ||
| 775 | EXPORT_SYMBOL(pipe_to_file); | ||
| 776 | |||
| 777 | static void wakeup_pipe_writers(struct pipe_inode_info *pipe) | 721 | static void wakeup_pipe_writers(struct pipe_inode_info *pipe) |
| 778 | { | 722 | { |
| 779 | smp_mb(); | 723 | smp_mb(); |
| @@ -802,7 +746,7 @@ static void wakeup_pipe_writers(struct pipe_inode_info *pipe) | |||
| 802 | * locking is required around copying the pipe buffers to the | 746 | * locking is required around copying the pipe buffers to the |
| 803 | * destination. | 747 | * destination. |
| 804 | */ | 748 | */ |
| 805 | int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_desc *sd, | 749 | static int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_desc *sd, |
| 806 | splice_actor *actor) | 750 | splice_actor *actor) |
| 807 | { | 751 | { |
| 808 | int ret; | 752 | int ret; |
| @@ -849,7 +793,6 @@ int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_desc *sd, | |||
| 849 | 793 | ||
| 850 | return 1; | 794 | return 1; |
| 851 | } | 795 | } |
| 852 | EXPORT_SYMBOL(splice_from_pipe_feed); | ||
| 853 | 796 | ||
| 854 | /** | 797 | /** |
| 855 | * splice_from_pipe_next - wait for some data to splice from | 798 | * splice_from_pipe_next - wait for some data to splice from |
| @@ -861,7 +804,7 @@ EXPORT_SYMBOL(splice_from_pipe_feed); | |||
| 861 | * value (one) if pipe buffers are available. It will return zero | 804 | * value (one) if pipe buffers are available. It will return zero |
| 862 | * or -errno if no more data needs to be spliced. | 805 | * or -errno if no more data needs to be spliced. |
| 863 | */ | 806 | */ |
| 864 | int splice_from_pipe_next(struct pipe_inode_info *pipe, struct splice_desc *sd) | 807 | static int splice_from_pipe_next(struct pipe_inode_info *pipe, struct splice_desc *sd) |
| 865 | { | 808 | { |
| 866 | while (!pipe->nrbufs) { | 809 | while (!pipe->nrbufs) { |
| 867 | if (!pipe->writers) | 810 | if (!pipe->writers) |
| @@ -886,7 +829,6 @@ int splice_from_pipe_next(struct pipe_inode_info *pipe, struct splice_desc *sd) | |||
| 886 | 829 | ||
| 887 | return 1; | 830 | return 1; |
| 888 | } | 831 | } |
| 889 | EXPORT_SYMBOL(splice_from_pipe_next); | ||
| 890 | 832 | ||
| 891 | /** | 833 | /** |
| 892 | * splice_from_pipe_begin - start splicing from pipe | 834 | * splice_from_pipe_begin - start splicing from pipe |
| @@ -897,12 +839,11 @@ EXPORT_SYMBOL(splice_from_pipe_next); | |||
| 897 | * splice_from_pipe_next() and splice_from_pipe_feed() to | 839 | * splice_from_pipe_next() and splice_from_pipe_feed() to |
| 898 | * initialize the necessary fields of @sd. | 840 | * initialize the necessary fields of @sd. |
| 899 | */ | 841 | */ |
| 900 | void splice_from_pipe_begin(struct splice_desc *sd) | 842 | static void splice_from_pipe_begin(struct splice_desc *sd) |
| 901 | { | 843 | { |
| 902 | sd->num_spliced = 0; | 844 | sd->num_spliced = 0; |
| 903 | sd->need_wakeup = false; | 845 | sd->need_wakeup = false; |
| 904 | } | 846 | } |
| 905 | EXPORT_SYMBOL(splice_from_pipe_begin); | ||
| 906 | 847 | ||
| 907 | /** | 848 | /** |
| 908 | * splice_from_pipe_end - finish splicing from pipe | 849 | * splice_from_pipe_end - finish splicing from pipe |
| @@ -914,12 +855,11 @@ EXPORT_SYMBOL(splice_from_pipe_begin); | |||
| 914 | * be called after a loop containing splice_from_pipe_next() and | 855 | * be called after a loop containing splice_from_pipe_next() and |
| 915 | * splice_from_pipe_feed(). | 856 | * splice_from_pipe_feed(). |
| 916 | */ | 857 | */ |
| 917 | void splice_from_pipe_end(struct pipe_inode_info *pipe, struct splice_desc *sd) | 858 | static void splice_from_pipe_end(struct pipe_inode_info *pipe, struct splice_desc *sd) |
| 918 | { | 859 | { |
| 919 | if (sd->need_wakeup) | 860 | if (sd->need_wakeup) |
| 920 | wakeup_pipe_writers(pipe); | 861 | wakeup_pipe_writers(pipe); |
| 921 | } | 862 | } |
| 922 | EXPORT_SYMBOL(splice_from_pipe_end); | ||
| 923 | 863 | ||
| 924 | /** | 864 | /** |
| 925 | * __splice_from_pipe - splice data from a pipe to given actor | 865 | * __splice_from_pipe - splice data from a pipe to given actor |
| @@ -985,7 +925,7 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out, | |||
| 985 | } | 925 | } |
| 986 | 926 | ||
| 987 | /** | 927 | /** |
| 988 | * generic_file_splice_write - splice data from a pipe to a file | 928 | * iter_file_splice_write - splice data from a pipe to a file |
| 989 | * @pipe: pipe info | 929 | * @pipe: pipe info |
| 990 | * @out: file to write to | 930 | * @out: file to write to |
| 991 | * @ppos: position in @out | 931 | * @ppos: position in @out |
| @@ -995,40 +935,122 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out, | |||
| 995 | * Description: | 935 | * Description: |
| 996 | * Will either move or copy pages (determined by @flags options) from | 936 | * Will either move or copy pages (determined by @flags options) from |
| 997 | * the given pipe inode to the given file. | 937 | * the given pipe inode to the given file. |
| 938 | * This one is ->write_iter-based. | ||
| 998 | * | 939 | * |
| 999 | */ | 940 | */ |
| 1000 | ssize_t | 941 | ssize_t |
| 1001 | generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out, | 942 | iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out, |
| 1002 | loff_t *ppos, size_t len, unsigned int flags) | 943 | loff_t *ppos, size_t len, unsigned int flags) |
| 1003 | { | 944 | { |
| 1004 | struct address_space *mapping = out->f_mapping; | ||
| 1005 | struct inode *inode = mapping->host; | ||
| 1006 | struct splice_desc sd = { | 945 | struct splice_desc sd = { |
| 1007 | .total_len = len, | 946 | .total_len = len, |
| 1008 | .flags = flags, | 947 | .flags = flags, |
| 1009 | .pos = *ppos, | 948 | .pos = *ppos, |
| 1010 | .u.file = out, | 949 | .u.file = out, |
| 1011 | }; | 950 | }; |
| 951 | int nbufs = pipe->buffers; | ||
| 952 | struct bio_vec *array = kcalloc(nbufs, sizeof(struct bio_vec), | ||
| 953 | GFP_KERNEL); | ||
| 1012 | ssize_t ret; | 954 | ssize_t ret; |
| 1013 | 955 | ||
| 956 | if (unlikely(!array)) | ||
| 957 | return -ENOMEM; | ||
| 958 | |||
| 1014 | pipe_lock(pipe); | 959 | pipe_lock(pipe); |
| 1015 | 960 | ||
| 1016 | splice_from_pipe_begin(&sd); | 961 | splice_from_pipe_begin(&sd); |
| 1017 | do { | 962 | while (sd.total_len) { |
| 963 | struct iov_iter from; | ||
| 964 | struct kiocb kiocb; | ||
| 965 | size_t left; | ||
| 966 | int n, idx; | ||
| 967 | |||
| 1018 | ret = splice_from_pipe_next(pipe, &sd); | 968 | ret = splice_from_pipe_next(pipe, &sd); |
| 1019 | if (ret <= 0) | 969 | if (ret <= 0) |
| 1020 | break; | 970 | break; |
| 1021 | 971 | ||
| 1022 | mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD); | 972 | if (unlikely(nbufs < pipe->buffers)) { |
| 1023 | ret = file_remove_suid(out); | 973 | kfree(array); |
| 1024 | if (!ret) { | 974 | nbufs = pipe->buffers; |
| 1025 | ret = file_update_time(out); | 975 | array = kcalloc(nbufs, sizeof(struct bio_vec), |
| 1026 | if (!ret) | 976 | GFP_KERNEL); |
| 1027 | ret = splice_from_pipe_feed(pipe, &sd, | 977 | if (!array) { |
| 1028 | pipe_to_file); | 978 | ret = -ENOMEM; |
| 979 | break; | ||
| 980 | } | ||
| 1029 | } | 981 | } |
| 1030 | mutex_unlock(&inode->i_mutex); | 982 | |
| 1031 | } while (ret > 0); | 983 | /* build the vector */ |
| 984 | left = sd.total_len; | ||
| 985 | for (n = 0, idx = pipe->curbuf; left && n < pipe->nrbufs; n++, idx++) { | ||
| 986 | struct pipe_buffer *buf = pipe->bufs + idx; | ||
| 987 | size_t this_len = buf->len; | ||
| 988 | |||
| 989 | if (this_len > left) | ||
| 990 | this_len = left; | ||
| 991 | |||
| 992 | if (idx == pipe->buffers - 1) | ||
| 993 | idx = -1; | ||
| 994 | |||
| 995 | ret = buf->ops->confirm(pipe, buf); | ||
| 996 | if (unlikely(ret)) { | ||
| 997 | if (ret == -ENODATA) | ||
| 998 | ret = 0; | ||
| 999 | goto done; | ||
| 1000 | } | ||
| 1001 | |||
| 1002 | array[n].bv_page = buf->page; | ||
| 1003 | array[n].bv_len = this_len; | ||
| 1004 | array[n].bv_offset = buf->offset; | ||
| 1005 | left -= this_len; | ||
| 1006 | } | ||
| 1007 | |||
| 1008 | /* ... iov_iter */ | ||
| 1009 | from.type = ITER_BVEC | WRITE; | ||
| 1010 | from.bvec = array; | ||
| 1011 | from.nr_segs = n; | ||
| 1012 | from.count = sd.total_len - left; | ||
| 1013 | from.iov_offset = 0; | ||
| 1014 | |||
| 1015 | /* ... and iocb */ | ||
| 1016 | init_sync_kiocb(&kiocb, out); | ||
| 1017 | kiocb.ki_pos = sd.pos; | ||
| 1018 | kiocb.ki_nbytes = sd.total_len - left; | ||
| 1019 | |||
| 1020 | /* now, send it */ | ||
| 1021 | ret = out->f_op->write_iter(&kiocb, &from); | ||
| 1022 | if (-EIOCBQUEUED == ret) | ||
| 1023 | ret = wait_on_sync_kiocb(&kiocb); | ||
| 1024 | |||
| 1025 | if (ret <= 0) | ||
| 1026 | break; | ||
| 1027 | |||
| 1028 | sd.num_spliced += ret; | ||
| 1029 | sd.total_len -= ret; | ||
| 1030 | *ppos = sd.pos = kiocb.ki_pos; | ||
| 1031 | |||
| 1032 | /* dismiss the fully eaten buffers, adjust the partial one */ | ||
| 1033 | while (ret) { | ||
| 1034 | struct pipe_buffer *buf = pipe->bufs + pipe->curbuf; | ||
| 1035 | if (ret >= buf->len) { | ||
| 1036 | const struct pipe_buf_operations *ops = buf->ops; | ||
| 1037 | ret -= buf->len; | ||
| 1038 | buf->len = 0; | ||
| 1039 | buf->ops = NULL; | ||
| 1040 | ops->release(pipe, buf); | ||
| 1041 | pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1); | ||
| 1042 | pipe->nrbufs--; | ||
| 1043 | if (pipe->files) | ||
| 1044 | sd.need_wakeup = true; | ||
| 1045 | } else { | ||
| 1046 | buf->offset += ret; | ||
| 1047 | buf->len -= ret; | ||
| 1048 | ret = 0; | ||
| 1049 | } | ||
| 1050 | } | ||
| 1051 | } | ||
| 1052 | done: | ||
| 1053 | kfree(array); | ||
| 1032 | splice_from_pipe_end(pipe, &sd); | 1054 | splice_from_pipe_end(pipe, &sd); |
| 1033 | 1055 | ||
| 1034 | pipe_unlock(pipe); | 1056 | pipe_unlock(pipe); |
| @@ -1036,21 +1058,10 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out, | |||
| 1036 | if (sd.num_spliced) | 1058 | if (sd.num_spliced) |
| 1037 | ret = sd.num_spliced; | 1059 | ret = sd.num_spliced; |
| 1038 | 1060 | ||
| 1039 | if (ret > 0) { | ||
| 1040 | int err; | ||
| 1041 | |||
| 1042 | err = generic_write_sync(out, *ppos, ret); | ||
| 1043 | if (err) | ||
| 1044 | ret = err; | ||
| 1045 | else | ||
| 1046 | *ppos += ret; | ||
| 1047 | balance_dirty_pages_ratelimited(mapping); | ||
| 1048 | } | ||
| 1049 | |||
| 1050 | return ret; | 1061 | return ret; |
| 1051 | } | 1062 | } |
| 1052 | 1063 | ||
| 1053 | EXPORT_SYMBOL(generic_file_splice_write); | 1064 | EXPORT_SYMBOL(iter_file_splice_write); |
| 1054 | 1065 | ||
| 1055 | static int write_pipe_buf(struct pipe_inode_info *pipe, struct pipe_buffer *buf, | 1066 | static int write_pipe_buf(struct pipe_inode_info *pipe, struct pipe_buffer *buf, |
| 1056 | struct splice_desc *sd) | 1067 | struct splice_desc *sd) |
| @@ -1549,7 +1560,7 @@ static long vmsplice_to_user(struct file *file, const struct iovec __user *uiov, | |||
| 1549 | goto out; | 1560 | goto out; |
| 1550 | 1561 | ||
| 1551 | count = ret; | 1562 | count = ret; |
| 1552 | iov_iter_init(&iter, iov, nr_segs, count, 0); | 1563 | iov_iter_init(&iter, READ, iov, nr_segs, count); |
| 1553 | 1564 | ||
| 1554 | sd.len = 0; | 1565 | sd.len = 0; |
| 1555 | sd.total_len = count; | 1566 | sd.total_len = count; |
diff --git a/fs/sysv/file.c b/fs/sysv/file.c index 9d4dc6831792..b00811c75b24 100644 --- a/fs/sysv/file.c +++ b/fs/sysv/file.c | |||
| @@ -21,10 +21,10 @@ | |||
| 21 | */ | 21 | */ |
| 22 | const struct file_operations sysv_file_operations = { | 22 | const struct file_operations sysv_file_operations = { |
| 23 | .llseek = generic_file_llseek, | 23 | .llseek = generic_file_llseek, |
| 24 | .read = do_sync_read, | 24 | .read = new_sync_read, |
| 25 | .aio_read = generic_file_aio_read, | 25 | .read_iter = generic_file_read_iter, |
| 26 | .write = do_sync_write, | 26 | .write = new_sync_write, |
| 27 | .aio_write = generic_file_aio_write, | 27 | .write_iter = generic_file_write_iter, |
| 28 | .mmap = generic_file_mmap, | 28 | .mmap = generic_file_mmap, |
| 29 | .fsync = generic_file_fsync, | 29 | .fsync = generic_file_fsync, |
| 30 | .splice_read = generic_file_splice_read, | 30 | .splice_read = generic_file_splice_read, |
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index 0ab7f7dfb98b..b5b593c45270 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c | |||
| @@ -1364,17 +1364,17 @@ static inline int mctime_update_needed(const struct inode *inode, | |||
| 1364 | 1364 | ||
| 1365 | /** | 1365 | /** |
| 1366 | * update_ctime - update mtime and ctime of an inode. | 1366 | * update_ctime - update mtime and ctime of an inode. |
| 1367 | * @c: UBIFS file-system description object | ||
| 1368 | * @inode: inode to update | 1367 | * @inode: inode to update |
| 1369 | * | 1368 | * |
| 1370 | * This function updates mtime and ctime of the inode if it is not equivalent to | 1369 | * This function updates mtime and ctime of the inode if it is not equivalent to |
| 1371 | * current time. Returns zero in case of success and a negative error code in | 1370 | * current time. Returns zero in case of success and a negative error code in |
| 1372 | * case of failure. | 1371 | * case of failure. |
| 1373 | */ | 1372 | */ |
| 1374 | static int update_mctime(struct ubifs_info *c, struct inode *inode) | 1373 | static int update_mctime(struct inode *inode) |
| 1375 | { | 1374 | { |
| 1376 | struct timespec now = ubifs_current_time(inode); | 1375 | struct timespec now = ubifs_current_time(inode); |
| 1377 | struct ubifs_inode *ui = ubifs_inode(inode); | 1376 | struct ubifs_inode *ui = ubifs_inode(inode); |
| 1377 | struct ubifs_info *c = inode->i_sb->s_fs_info; | ||
| 1378 | 1378 | ||
| 1379 | if (mctime_update_needed(inode, &now)) { | 1379 | if (mctime_update_needed(inode, &now)) { |
| 1380 | int err, release; | 1380 | int err, release; |
| @@ -1397,18 +1397,13 @@ static int update_mctime(struct ubifs_info *c, struct inode *inode) | |||
| 1397 | return 0; | 1397 | return 0; |
| 1398 | } | 1398 | } |
| 1399 | 1399 | ||
| 1400 | static ssize_t ubifs_aio_write(struct kiocb *iocb, const struct iovec *iov, | 1400 | static ssize_t ubifs_write_iter(struct kiocb *iocb, struct iov_iter *from) |
| 1401 | unsigned long nr_segs, loff_t pos) | ||
| 1402 | { | 1401 | { |
| 1403 | int err; | 1402 | int err = update_mctime(file_inode(iocb->ki_filp)); |
| 1404 | struct inode *inode = iocb->ki_filp->f_mapping->host; | ||
| 1405 | struct ubifs_info *c = inode->i_sb->s_fs_info; | ||
| 1406 | |||
| 1407 | err = update_mctime(c, inode); | ||
| 1408 | if (err) | 1403 | if (err) |
| 1409 | return err; | 1404 | return err; |
| 1410 | 1405 | ||
| 1411 | return generic_file_aio_write(iocb, iov, nr_segs, pos); | 1406 | return generic_file_write_iter(iocb, from); |
| 1412 | } | 1407 | } |
| 1413 | 1408 | ||
| 1414 | static int ubifs_set_page_dirty(struct page *page) | 1409 | static int ubifs_set_page_dirty(struct page *page) |
| @@ -1582,15 +1577,15 @@ const struct inode_operations ubifs_symlink_inode_operations = { | |||
| 1582 | 1577 | ||
| 1583 | const struct file_operations ubifs_file_operations = { | 1578 | const struct file_operations ubifs_file_operations = { |
| 1584 | .llseek = generic_file_llseek, | 1579 | .llseek = generic_file_llseek, |
| 1585 | .read = do_sync_read, | 1580 | .read = new_sync_read, |
| 1586 | .write = do_sync_write, | 1581 | .write = new_sync_write, |
| 1587 | .aio_read = generic_file_aio_read, | 1582 | .read_iter = generic_file_read_iter, |
| 1588 | .aio_write = ubifs_aio_write, | 1583 | .write_iter = ubifs_write_iter, |
| 1589 | .mmap = ubifs_file_mmap, | 1584 | .mmap = ubifs_file_mmap, |
| 1590 | .fsync = ubifs_fsync, | 1585 | .fsync = ubifs_fsync, |
| 1591 | .unlocked_ioctl = ubifs_ioctl, | 1586 | .unlocked_ioctl = ubifs_ioctl, |
| 1592 | .splice_read = generic_file_splice_read, | 1587 | .splice_read = generic_file_splice_read, |
| 1593 | .splice_write = generic_file_splice_write, | 1588 | .splice_write = iter_file_splice_write, |
| 1594 | #ifdef CONFIG_COMPAT | 1589 | #ifdef CONFIG_COMPAT |
| 1595 | .compat_ioctl = ubifs_compat_ioctl, | 1590 | .compat_ioctl = ubifs_compat_ioctl, |
| 1596 | #endif | 1591 | #endif |
diff --git a/fs/udf/file.c b/fs/udf/file.c index d2c170f8b035..d80738fdf424 100644 --- a/fs/udf/file.c +++ b/fs/udf/file.c | |||
| @@ -119,8 +119,8 @@ static int udf_adinicb_write_end(struct file *file, | |||
| 119 | } | 119 | } |
| 120 | 120 | ||
| 121 | static ssize_t udf_adinicb_direct_IO(int rw, struct kiocb *iocb, | 121 | static ssize_t udf_adinicb_direct_IO(int rw, struct kiocb *iocb, |
| 122 | const struct iovec *iov, | 122 | struct iov_iter *iter, |
| 123 | loff_t offset, unsigned long nr_segs) | 123 | loff_t offset) |
| 124 | { | 124 | { |
| 125 | /* Fallback to buffered I/O. */ | 125 | /* Fallback to buffered I/O. */ |
| 126 | return 0; | 126 | return 0; |
| @@ -134,8 +134,7 @@ const struct address_space_operations udf_adinicb_aops = { | |||
| 134 | .direct_IO = udf_adinicb_direct_IO, | 134 | .direct_IO = udf_adinicb_direct_IO, |
| 135 | }; | 135 | }; |
| 136 | 136 | ||
| 137 | static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov, | 137 | static ssize_t udf_file_write_iter(struct kiocb *iocb, struct iov_iter *from) |
| 138 | unsigned long nr_segs, loff_t ppos) | ||
| 139 | { | 138 | { |
| 140 | ssize_t retval; | 139 | ssize_t retval; |
| 141 | struct file *file = iocb->ki_filp; | 140 | struct file *file = iocb->ki_filp; |
| @@ -150,7 +149,7 @@ static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov, | |||
| 150 | if (file->f_flags & O_APPEND) | 149 | if (file->f_flags & O_APPEND) |
| 151 | pos = inode->i_size; | 150 | pos = inode->i_size; |
| 152 | else | 151 | else |
| 153 | pos = ppos; | 152 | pos = iocb->ki_pos; |
| 154 | 153 | ||
| 155 | if (inode->i_sb->s_blocksize < | 154 | if (inode->i_sb->s_blocksize < |
| 156 | (udf_file_entry_alloc_offset(inode) + | 155 | (udf_file_entry_alloc_offset(inode) + |
| @@ -171,7 +170,7 @@ static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov, | |||
| 171 | } else | 170 | } else |
| 172 | up_write(&iinfo->i_data_sem); | 171 | up_write(&iinfo->i_data_sem); |
| 173 | 172 | ||
| 174 | retval = __generic_file_aio_write(iocb, iov, nr_segs); | 173 | retval = __generic_file_write_iter(iocb, from); |
| 175 | mutex_unlock(&inode->i_mutex); | 174 | mutex_unlock(&inode->i_mutex); |
| 176 | 175 | ||
| 177 | if (retval > 0) { | 176 | if (retval > 0) { |
| @@ -252,13 +251,13 @@ static int udf_release_file(struct inode *inode, struct file *filp) | |||
| 252 | } | 251 | } |
| 253 | 252 | ||
| 254 | const struct file_operations udf_file_operations = { | 253 | const struct file_operations udf_file_operations = { |
| 255 | .read = do_sync_read, | 254 | .read = new_sync_read, |
| 256 | .aio_read = generic_file_aio_read, | 255 | .read_iter = generic_file_read_iter, |
| 257 | .unlocked_ioctl = udf_ioctl, | 256 | .unlocked_ioctl = udf_ioctl, |
| 258 | .open = generic_file_open, | 257 | .open = generic_file_open, |
| 259 | .mmap = generic_file_mmap, | 258 | .mmap = generic_file_mmap, |
| 260 | .write = do_sync_write, | 259 | .write = new_sync_write, |
| 261 | .aio_write = udf_file_aio_write, | 260 | .write_iter = udf_file_write_iter, |
| 262 | .release = udf_release_file, | 261 | .release = udf_release_file, |
| 263 | .fsync = generic_file_fsync, | 262 | .fsync = generic_file_fsync, |
| 264 | .splice_read = generic_file_splice_read, | 263 | .splice_read = generic_file_splice_read, |
diff --git a/fs/udf/inode.c b/fs/udf/inode.c index 5d643706212f..236cd48184c2 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c | |||
| @@ -217,18 +217,18 @@ static int udf_write_begin(struct file *file, struct address_space *mapping, | |||
| 217 | } | 217 | } |
| 218 | 218 | ||
| 219 | static ssize_t udf_direct_IO(int rw, struct kiocb *iocb, | 219 | static ssize_t udf_direct_IO(int rw, struct kiocb *iocb, |
| 220 | const struct iovec *iov, | 220 | struct iov_iter *iter, |
| 221 | loff_t offset, unsigned long nr_segs) | 221 | loff_t offset) |
| 222 | { | 222 | { |
| 223 | struct file *file = iocb->ki_filp; | 223 | struct file *file = iocb->ki_filp; |
| 224 | struct address_space *mapping = file->f_mapping; | 224 | struct address_space *mapping = file->f_mapping; |
| 225 | struct inode *inode = mapping->host; | 225 | struct inode *inode = mapping->host; |
| 226 | size_t count = iov_iter_count(iter); | ||
| 226 | ssize_t ret; | 227 | ssize_t ret; |
| 227 | 228 | ||
| 228 | ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs, | 229 | ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, udf_get_block); |
| 229 | udf_get_block); | ||
| 230 | if (unlikely(ret < 0 && (rw & WRITE))) | 230 | if (unlikely(ret < 0 && (rw & WRITE))) |
| 231 | udf_write_failed(mapping, offset + iov_length(iov, nr_segs)); | 231 | udf_write_failed(mapping, offset + count); |
| 232 | return ret; | 232 | return ret; |
| 233 | } | 233 | } |
| 234 | 234 | ||
diff --git a/fs/ufs/file.c b/fs/ufs/file.c index 33afa20d4509..c84ec010a676 100644 --- a/fs/ufs/file.c +++ b/fs/ufs/file.c | |||
| @@ -35,10 +35,10 @@ | |||
| 35 | 35 | ||
| 36 | const struct file_operations ufs_file_operations = { | 36 | const struct file_operations ufs_file_operations = { |
| 37 | .llseek = generic_file_llseek, | 37 | .llseek = generic_file_llseek, |
| 38 | .read = do_sync_read, | 38 | .read = new_sync_read, |
| 39 | .aio_read = generic_file_aio_read, | 39 | .read_iter = generic_file_read_iter, |
| 40 | .write = do_sync_write, | 40 | .write = new_sync_write, |
| 41 | .aio_write = generic_file_aio_write, | 41 | .write_iter = generic_file_write_iter, |
| 42 | .mmap = generic_file_mmap, | 42 | .mmap = generic_file_mmap, |
| 43 | .open = generic_file_open, | 43 | .open = generic_file_open, |
| 44 | .fsync = generic_file_fsync, | 44 | .fsync = generic_file_fsync, |
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index e32640eedea6..faaf716e2080 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c | |||
| @@ -1486,9 +1486,8 @@ STATIC ssize_t | |||
| 1486 | xfs_vm_direct_IO( | 1486 | xfs_vm_direct_IO( |
| 1487 | int rw, | 1487 | int rw, |
| 1488 | struct kiocb *iocb, | 1488 | struct kiocb *iocb, |
| 1489 | const struct iovec *iov, | 1489 | struct iov_iter *iter, |
| 1490 | loff_t offset, | 1490 | loff_t offset) |
| 1491 | unsigned long nr_segs) | ||
| 1492 | { | 1491 | { |
| 1493 | struct inode *inode = iocb->ki_filp->f_mapping->host; | 1492 | struct inode *inode = iocb->ki_filp->f_mapping->host; |
| 1494 | struct block_device *bdev = xfs_find_bdev_for_inode(inode); | 1493 | struct block_device *bdev = xfs_find_bdev_for_inode(inode); |
| @@ -1496,7 +1495,7 @@ xfs_vm_direct_IO( | |||
| 1496 | ssize_t ret; | 1495 | ssize_t ret; |
| 1497 | 1496 | ||
| 1498 | if (rw & WRITE) { | 1497 | if (rw & WRITE) { |
| 1499 | size_t size = iov_length(iov, nr_segs); | 1498 | size_t size = iov_iter_count(iter); |
| 1500 | 1499 | ||
| 1501 | /* | 1500 | /* |
| 1502 | * We cannot preallocate a size update transaction here as we | 1501 | * We cannot preallocate a size update transaction here as we |
| @@ -1508,17 +1507,15 @@ xfs_vm_direct_IO( | |||
| 1508 | if (offset + size > XFS_I(inode)->i_d.di_size) | 1507 | if (offset + size > XFS_I(inode)->i_d.di_size) |
| 1509 | ioend->io_isdirect = 1; | 1508 | ioend->io_isdirect = 1; |
| 1510 | 1509 | ||
| 1511 | ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov, | 1510 | ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iter, |
| 1512 | offset, nr_segs, | 1511 | offset, xfs_get_blocks_direct, |
| 1513 | xfs_get_blocks_direct, | ||
| 1514 | xfs_end_io_direct_write, NULL, | 1512 | xfs_end_io_direct_write, NULL, |
| 1515 | DIO_ASYNC_EXTEND); | 1513 | DIO_ASYNC_EXTEND); |
| 1516 | if (ret != -EIOCBQUEUED && iocb->private) | 1514 | if (ret != -EIOCBQUEUED && iocb->private) |
| 1517 | goto out_destroy_ioend; | 1515 | goto out_destroy_ioend; |
| 1518 | } else { | 1516 | } else { |
| 1519 | ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov, | 1517 | ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iter, |
| 1520 | offset, nr_segs, | 1518 | offset, xfs_get_blocks_direct, |
| 1521 | xfs_get_blocks_direct, | ||
| 1522 | NULL, NULL, 0); | 1519 | NULL, NULL, 0); |
| 1523 | } | 1520 | } |
| 1524 | 1521 | ||
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 1b8160dc04d1..1f66779d7a46 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c | |||
| @@ -229,34 +229,27 @@ xfs_file_fsync( | |||
| 229 | } | 229 | } |
| 230 | 230 | ||
| 231 | STATIC ssize_t | 231 | STATIC ssize_t |
| 232 | xfs_file_aio_read( | 232 | xfs_file_read_iter( |
| 233 | struct kiocb *iocb, | 233 | struct kiocb *iocb, |
| 234 | const struct iovec *iovp, | 234 | struct iov_iter *to) |
| 235 | unsigned long nr_segs, | ||
| 236 | loff_t pos) | ||
| 237 | { | 235 | { |
| 238 | struct file *file = iocb->ki_filp; | 236 | struct file *file = iocb->ki_filp; |
| 239 | struct inode *inode = file->f_mapping->host; | 237 | struct inode *inode = file->f_mapping->host; |
| 240 | struct xfs_inode *ip = XFS_I(inode); | 238 | struct xfs_inode *ip = XFS_I(inode); |
| 241 | struct xfs_mount *mp = ip->i_mount; | 239 | struct xfs_mount *mp = ip->i_mount; |
| 242 | size_t size = 0; | 240 | size_t size = iov_iter_count(to); |
| 243 | ssize_t ret = 0; | 241 | ssize_t ret = 0; |
| 244 | int ioflags = 0; | 242 | int ioflags = 0; |
| 245 | xfs_fsize_t n; | 243 | xfs_fsize_t n; |
| 244 | loff_t pos = iocb->ki_pos; | ||
| 246 | 245 | ||
| 247 | XFS_STATS_INC(xs_read_calls); | 246 | XFS_STATS_INC(xs_read_calls); |
| 248 | 247 | ||
| 249 | BUG_ON(iocb->ki_pos != pos); | ||
| 250 | |||
| 251 | if (unlikely(file->f_flags & O_DIRECT)) | 248 | if (unlikely(file->f_flags & O_DIRECT)) |
| 252 | ioflags |= IO_ISDIRECT; | 249 | ioflags |= IO_ISDIRECT; |
| 253 | if (file->f_mode & FMODE_NOCMTIME) | 250 | if (file->f_mode & FMODE_NOCMTIME) |
| 254 | ioflags |= IO_INVIS; | 251 | ioflags |= IO_INVIS; |
| 255 | 252 | ||
| 256 | ret = generic_segment_checks(iovp, &nr_segs, &size, VERIFY_WRITE); | ||
| 257 | if (ret < 0) | ||
| 258 | return ret; | ||
| 259 | |||
| 260 | if (unlikely(ioflags & IO_ISDIRECT)) { | 253 | if (unlikely(ioflags & IO_ISDIRECT)) { |
| 261 | xfs_buftarg_t *target = | 254 | xfs_buftarg_t *target = |
| 262 | XFS_IS_REALTIME_INODE(ip) ? | 255 | XFS_IS_REALTIME_INODE(ip) ? |
| @@ -309,7 +302,7 @@ xfs_file_aio_read( | |||
| 309 | 302 | ||
| 310 | trace_xfs_file_read(ip, size, pos, ioflags); | 303 | trace_xfs_file_read(ip, size, pos, ioflags); |
| 311 | 304 | ||
| 312 | ret = generic_file_aio_read(iocb, iovp, nr_segs, pos); | 305 | ret = generic_file_read_iter(iocb, to); |
| 313 | if (ret > 0) | 306 | if (ret > 0) |
| 314 | XFS_STATS_ADD(xs_read_bytes, ret); | 307 | XFS_STATS_ADD(xs_read_bytes, ret); |
| 315 | 308 | ||
| @@ -350,47 +343,6 @@ xfs_file_splice_read( | |||
| 350 | } | 343 | } |
| 351 | 344 | ||
| 352 | /* | 345 | /* |
| 353 | * xfs_file_splice_write() does not use xfs_rw_ilock() because | ||
| 354 | * generic_file_splice_write() takes the i_mutex itself. This, in theory, | ||
| 355 | * couuld cause lock inversions between the aio_write path and the splice path | ||
| 356 | * if someone is doing concurrent splice(2) based writes and write(2) based | ||
| 357 | * writes to the same inode. The only real way to fix this is to re-implement | ||
| 358 | * the generic code here with correct locking orders. | ||
| 359 | */ | ||
| 360 | STATIC ssize_t | ||
| 361 | xfs_file_splice_write( | ||
| 362 | struct pipe_inode_info *pipe, | ||
| 363 | struct file *outfilp, | ||
| 364 | loff_t *ppos, | ||
| 365 | size_t count, | ||
| 366 | unsigned int flags) | ||
| 367 | { | ||
| 368 | struct inode *inode = outfilp->f_mapping->host; | ||
| 369 | struct xfs_inode *ip = XFS_I(inode); | ||
| 370 | int ioflags = 0; | ||
| 371 | ssize_t ret; | ||
| 372 | |||
| 373 | XFS_STATS_INC(xs_write_calls); | ||
| 374 | |||
| 375 | if (outfilp->f_mode & FMODE_NOCMTIME) | ||
| 376 | ioflags |= IO_INVIS; | ||
| 377 | |||
| 378 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) | ||
| 379 | return -EIO; | ||
| 380 | |||
| 381 | xfs_ilock(ip, XFS_IOLOCK_EXCL); | ||
| 382 | |||
| 383 | trace_xfs_file_splice_write(ip, count, *ppos, ioflags); | ||
| 384 | |||
| 385 | ret = generic_file_splice_write(pipe, outfilp, ppos, count, flags); | ||
| 386 | if (ret > 0) | ||
| 387 | XFS_STATS_ADD(xs_write_bytes, ret); | ||
| 388 | |||
| 389 | xfs_iunlock(ip, XFS_IOLOCK_EXCL); | ||
| 390 | return ret; | ||
| 391 | } | ||
| 392 | |||
| 393 | /* | ||
| 394 | * This routine is called to handle zeroing any space in the last block of the | 346 | * This routine is called to handle zeroing any space in the last block of the |
| 395 | * file that is beyond the EOF. We do this since the size is being increased | 347 | * file that is beyond the EOF. We do this since the size is being increased |
| 396 | * without writing anything to that block and we don't want to read the | 348 | * without writing anything to that block and we don't want to read the |
| @@ -625,10 +577,7 @@ restart: | |||
| 625 | STATIC ssize_t | 577 | STATIC ssize_t |
| 626 | xfs_file_dio_aio_write( | 578 | xfs_file_dio_aio_write( |
| 627 | struct kiocb *iocb, | 579 | struct kiocb *iocb, |
| 628 | const struct iovec *iovp, | 580 | struct iov_iter *from) |
| 629 | unsigned long nr_segs, | ||
| 630 | loff_t pos, | ||
| 631 | size_t ocount) | ||
| 632 | { | 581 | { |
| 633 | struct file *file = iocb->ki_filp; | 582 | struct file *file = iocb->ki_filp; |
| 634 | struct address_space *mapping = file->f_mapping; | 583 | struct address_space *mapping = file->f_mapping; |
| @@ -636,9 +585,10 @@ xfs_file_dio_aio_write( | |||
| 636 | struct xfs_inode *ip = XFS_I(inode); | 585 | struct xfs_inode *ip = XFS_I(inode); |
| 637 | struct xfs_mount *mp = ip->i_mount; | 586 | struct xfs_mount *mp = ip->i_mount; |
| 638 | ssize_t ret = 0; | 587 | ssize_t ret = 0; |
| 639 | size_t count = ocount; | ||
| 640 | int unaligned_io = 0; | 588 | int unaligned_io = 0; |
| 641 | int iolock; | 589 | int iolock; |
| 590 | size_t count = iov_iter_count(from); | ||
| 591 | loff_t pos = iocb->ki_pos; | ||
| 642 | struct xfs_buftarg *target = XFS_IS_REALTIME_INODE(ip) ? | 592 | struct xfs_buftarg *target = XFS_IS_REALTIME_INODE(ip) ? |
| 643 | mp->m_rtdev_targp : mp->m_ddev_targp; | 593 | mp->m_rtdev_targp : mp->m_ddev_targp; |
| 644 | 594 | ||
| @@ -677,6 +627,7 @@ xfs_file_dio_aio_write( | |||
| 677 | ret = xfs_file_aio_write_checks(file, &pos, &count, &iolock); | 627 | ret = xfs_file_aio_write_checks(file, &pos, &count, &iolock); |
| 678 | if (ret) | 628 | if (ret) |
| 679 | goto out; | 629 | goto out; |
| 630 | iov_iter_truncate(from, count); | ||
| 680 | 631 | ||
| 681 | if (mapping->nrpages) { | 632 | if (mapping->nrpages) { |
| 682 | ret = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, | 633 | ret = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, |
| @@ -698,8 +649,7 @@ xfs_file_dio_aio_write( | |||
| 698 | } | 649 | } |
| 699 | 650 | ||
| 700 | trace_xfs_file_direct_write(ip, count, iocb->ki_pos, 0); | 651 | trace_xfs_file_direct_write(ip, count, iocb->ki_pos, 0); |
| 701 | ret = generic_file_direct_write(iocb, iovp, | 652 | ret = generic_file_direct_write(iocb, from, pos); |
| 702 | &nr_segs, pos, count, ocount); | ||
| 703 | 653 | ||
| 704 | out: | 654 | out: |
| 705 | xfs_rw_iunlock(ip, iolock); | 655 | xfs_rw_iunlock(ip, iolock); |
| @@ -712,10 +662,7 @@ out: | |||
| 712 | STATIC ssize_t | 662 | STATIC ssize_t |
| 713 | xfs_file_buffered_aio_write( | 663 | xfs_file_buffered_aio_write( |
| 714 | struct kiocb *iocb, | 664 | struct kiocb *iocb, |
| 715 | const struct iovec *iovp, | 665 | struct iov_iter *from) |
| 716 | unsigned long nr_segs, | ||
| 717 | loff_t pos, | ||
| 718 | size_t count) | ||
| 719 | { | 666 | { |
| 720 | struct file *file = iocb->ki_filp; | 667 | struct file *file = iocb->ki_filp; |
| 721 | struct address_space *mapping = file->f_mapping; | 668 | struct address_space *mapping = file->f_mapping; |
| @@ -724,7 +671,8 @@ xfs_file_buffered_aio_write( | |||
| 724 | ssize_t ret; | 671 | ssize_t ret; |
| 725 | int enospc = 0; | 672 | int enospc = 0; |
| 726 | int iolock = XFS_IOLOCK_EXCL; | 673 | int iolock = XFS_IOLOCK_EXCL; |
| 727 | struct iov_iter from; | 674 | loff_t pos = iocb->ki_pos; |
| 675 | size_t count = iov_iter_count(from); | ||
| 728 | 676 | ||
| 729 | xfs_rw_ilock(ip, iolock); | 677 | xfs_rw_ilock(ip, iolock); |
| 730 | 678 | ||
| @@ -732,13 +680,13 @@ xfs_file_buffered_aio_write( | |||
| 732 | if (ret) | 680 | if (ret) |
| 733 | goto out; | 681 | goto out; |
| 734 | 682 | ||
| 735 | iov_iter_init(&from, iovp, nr_segs, count, 0); | 683 | iov_iter_truncate(from, count); |
| 736 | /* We can write back this queue in page reclaim */ | 684 | /* We can write back this queue in page reclaim */ |
| 737 | current->backing_dev_info = mapping->backing_dev_info; | 685 | current->backing_dev_info = mapping->backing_dev_info; |
| 738 | 686 | ||
| 739 | write_retry: | 687 | write_retry: |
| 740 | trace_xfs_file_buffered_write(ip, count, iocb->ki_pos, 0); | 688 | trace_xfs_file_buffered_write(ip, count, iocb->ki_pos, 0); |
| 741 | ret = generic_perform_write(file, &from, pos); | 689 | ret = generic_perform_write(file, from, pos); |
| 742 | if (likely(ret >= 0)) | 690 | if (likely(ret >= 0)) |
| 743 | iocb->ki_pos = pos + ret; | 691 | iocb->ki_pos = pos + ret; |
| 744 | /* | 692 | /* |
| @@ -759,40 +707,29 @@ out: | |||
| 759 | } | 707 | } |
| 760 | 708 | ||
| 761 | STATIC ssize_t | 709 | STATIC ssize_t |
| 762 | xfs_file_aio_write( | 710 | xfs_file_write_iter( |
| 763 | struct kiocb *iocb, | 711 | struct kiocb *iocb, |
| 764 | const struct iovec *iovp, | 712 | struct iov_iter *from) |
| 765 | unsigned long nr_segs, | ||
| 766 | loff_t pos) | ||
| 767 | { | 713 | { |
| 768 | struct file *file = iocb->ki_filp; | 714 | struct file *file = iocb->ki_filp; |
| 769 | struct address_space *mapping = file->f_mapping; | 715 | struct address_space *mapping = file->f_mapping; |
| 770 | struct inode *inode = mapping->host; | 716 | struct inode *inode = mapping->host; |
| 771 | struct xfs_inode *ip = XFS_I(inode); | 717 | struct xfs_inode *ip = XFS_I(inode); |
| 772 | ssize_t ret; | 718 | ssize_t ret; |
| 773 | size_t ocount = 0; | 719 | size_t ocount = iov_iter_count(from); |
| 774 | 720 | ||
| 775 | XFS_STATS_INC(xs_write_calls); | 721 | XFS_STATS_INC(xs_write_calls); |
| 776 | 722 | ||
| 777 | BUG_ON(iocb->ki_pos != pos); | ||
| 778 | |||
| 779 | ret = generic_segment_checks(iovp, &nr_segs, &ocount, VERIFY_READ); | ||
| 780 | if (ret) | ||
| 781 | return ret; | ||
| 782 | |||
| 783 | if (ocount == 0) | 723 | if (ocount == 0) |
| 784 | return 0; | 724 | return 0; |
| 785 | 725 | ||
| 786 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { | 726 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) |
| 787 | ret = -EIO; | 727 | return -EIO; |
| 788 | goto out; | ||
| 789 | } | ||
| 790 | 728 | ||
| 791 | if (unlikely(file->f_flags & O_DIRECT)) | 729 | if (unlikely(file->f_flags & O_DIRECT)) |
| 792 | ret = xfs_file_dio_aio_write(iocb, iovp, nr_segs, pos, ocount); | 730 | ret = xfs_file_dio_aio_write(iocb, from); |
| 793 | else | 731 | else |
| 794 | ret = xfs_file_buffered_aio_write(iocb, iovp, nr_segs, pos, | 732 | ret = xfs_file_buffered_aio_write(iocb, from); |
| 795 | ocount); | ||
| 796 | 733 | ||
| 797 | if (ret > 0) { | 734 | if (ret > 0) { |
| 798 | ssize_t err; | 735 | ssize_t err; |
| @@ -804,8 +741,6 @@ xfs_file_aio_write( | |||
| 804 | if (err < 0) | 741 | if (err < 0) |
| 805 | ret = err; | 742 | ret = err; |
| 806 | } | 743 | } |
| 807 | |||
| 808 | out: | ||
| 809 | return ret; | 744 | return ret; |
| 810 | } | 745 | } |
| 811 | 746 | ||
| @@ -1461,12 +1396,12 @@ xfs_file_llseek( | |||
| 1461 | 1396 | ||
| 1462 | const struct file_operations xfs_file_operations = { | 1397 | const struct file_operations xfs_file_operations = { |
| 1463 | .llseek = xfs_file_llseek, | 1398 | .llseek = xfs_file_llseek, |
| 1464 | .read = do_sync_read, | 1399 | .read = new_sync_read, |
| 1465 | .write = do_sync_write, | 1400 | .write = new_sync_write, |
| 1466 | .aio_read = xfs_file_aio_read, | 1401 | .read_iter = xfs_file_read_iter, |
| 1467 | .aio_write = xfs_file_aio_write, | 1402 | .write_iter = xfs_file_write_iter, |
| 1468 | .splice_read = xfs_file_splice_read, | 1403 | .splice_read = xfs_file_splice_read, |
| 1469 | .splice_write = xfs_file_splice_write, | 1404 | .splice_write = iter_file_splice_write, |
| 1470 | .unlocked_ioctl = xfs_file_ioctl, | 1405 | .unlocked_ioctl = xfs_file_ioctl, |
| 1471 | #ifdef CONFIG_COMPAT | 1406 | #ifdef CONFIG_COMPAT |
| 1472 | .compat_ioctl = xfs_file_compat_ioctl, | 1407 | .compat_ioctl = xfs_file_compat_ioctl, |
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 6910458915cf..152f82782630 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h | |||
| @@ -1118,7 +1118,6 @@ DEFINE_RW_EVENT(xfs_file_read); | |||
| 1118 | DEFINE_RW_EVENT(xfs_file_buffered_write); | 1118 | DEFINE_RW_EVENT(xfs_file_buffered_write); |
| 1119 | DEFINE_RW_EVENT(xfs_file_direct_write); | 1119 | DEFINE_RW_EVENT(xfs_file_direct_write); |
| 1120 | DEFINE_RW_EVENT(xfs_file_splice_read); | 1120 | DEFINE_RW_EVENT(xfs_file_splice_read); |
| 1121 | DEFINE_RW_EVENT(xfs_file_splice_write); | ||
| 1122 | 1121 | ||
| 1123 | DECLARE_EVENT_CLASS(xfs_page_class, | 1122 | DECLARE_EVENT_CLASS(xfs_page_class, |
| 1124 | TP_PROTO(struct inode *inode, struct page *page, unsigned long off, | 1123 | TP_PROTO(struct inode *inode, struct page *page, unsigned long off, |
