diff options
Diffstat (limited to 'fs')
117 files changed, 2336 insertions, 1194 deletions
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c index eb14e055ea83..ff1a5bac4200 100644 --- a/fs/9p/vfs_addr.c +++ b/fs/9p/vfs_addr.c | |||
| @@ -33,7 +33,7 @@ | |||
| 33 | #include <linux/pagemap.h> | 33 | #include <linux/pagemap.h> |
| 34 | #include <linux/idr.h> | 34 | #include <linux/idr.h> |
| 35 | #include <linux/sched.h> | 35 | #include <linux/sched.h> |
| 36 | #include <linux/aio.h> | 36 | #include <linux/uio.h> |
| 37 | #include <net/9p/9p.h> | 37 | #include <net/9p/9p.h> |
| 38 | #include <net/9p/client.h> | 38 | #include <net/9p/client.h> |
| 39 | 39 | ||
diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt index 270c48148f79..2d0cbbd14cfc 100644 --- a/fs/Kconfig.binfmt +++ b/fs/Kconfig.binfmt | |||
| @@ -27,9 +27,6 @@ config COMPAT_BINFMT_ELF | |||
| 27 | bool | 27 | bool |
| 28 | depends on COMPAT && BINFMT_ELF | 28 | depends on COMPAT && BINFMT_ELF |
| 29 | 29 | ||
| 30 | config ARCH_BINFMT_ELF_RANDOMIZE_PIE | ||
| 31 | bool | ||
| 32 | |||
| 33 | config ARCH_BINFMT_ELF_STATE | 30 | config ARCH_BINFMT_ELF_STATE |
| 34 | bool | 31 | bool |
| 35 | 32 | ||
diff --git a/fs/Makefile b/fs/Makefile index a88ac4838c9e..cb92fd4c3172 100644 --- a/fs/Makefile +++ b/fs/Makefile | |||
| @@ -118,6 +118,7 @@ obj-$(CONFIG_HOSTFS) += hostfs/ | |||
| 118 | obj-$(CONFIG_HPPFS) += hppfs/ | 118 | obj-$(CONFIG_HPPFS) += hppfs/ |
| 119 | obj-$(CONFIG_CACHEFILES) += cachefiles/ | 119 | obj-$(CONFIG_CACHEFILES) += cachefiles/ |
| 120 | obj-$(CONFIG_DEBUG_FS) += debugfs/ | 120 | obj-$(CONFIG_DEBUG_FS) += debugfs/ |
| 121 | obj-$(CONFIG_TRACING) += tracefs/ | ||
| 121 | obj-$(CONFIG_OCFS2_FS) += ocfs2/ | 122 | obj-$(CONFIG_OCFS2_FS) += ocfs2/ |
| 122 | obj-$(CONFIG_BTRFS_FS) += btrfs/ | 123 | obj-$(CONFIG_BTRFS_FS) += btrfs/ |
| 123 | obj-$(CONFIG_GFS2_FS) += gfs2/ | 124 | obj-$(CONFIG_GFS2_FS) += gfs2/ |
diff --git a/fs/affs/file.c b/fs/affs/file.c index d2468bf95669..3aa7eb66547e 100644 --- a/fs/affs/file.c +++ b/fs/affs/file.c | |||
| @@ -12,7 +12,7 @@ | |||
| 12 | * affs regular file handling primitives | 12 | * affs regular file handling primitives |
| 13 | */ | 13 | */ |
| 14 | 14 | ||
| 15 | #include <linux/aio.h> | 15 | #include <linux/uio.h> |
| 16 | #include "affs.h" | 16 | #include "affs.h" |
| 17 | 17 | ||
| 18 | static struct buffer_head *affs_get_extblock_slow(struct inode *inode, u32 ext); | 18 | static struct buffer_head *affs_get_extblock_slow(struct inode *inode, u32 ext); |
| @@ -699,8 +699,10 @@ static int affs_write_end_ofs(struct file *file, struct address_space *mapping, | |||
| 699 | boff = tmp % bsize; | 699 | boff = tmp % bsize; |
| 700 | if (boff) { | 700 | if (boff) { |
| 701 | bh = affs_bread_ino(inode, bidx, 0); | 701 | bh = affs_bread_ino(inode, bidx, 0); |
| 702 | if (IS_ERR(bh)) | 702 | if (IS_ERR(bh)) { |
| 703 | return PTR_ERR(bh); | 703 | written = PTR_ERR(bh); |
| 704 | goto err_first_bh; | ||
| 705 | } | ||
| 704 | tmp = min(bsize - boff, to - from); | 706 | tmp = min(bsize - boff, to - from); |
| 705 | BUG_ON(boff + tmp > bsize || tmp > bsize); | 707 | BUG_ON(boff + tmp > bsize || tmp > bsize); |
| 706 | memcpy(AFFS_DATA(bh) + boff, data + from, tmp); | 708 | memcpy(AFFS_DATA(bh) + boff, data + from, tmp); |
| @@ -712,14 +714,16 @@ static int affs_write_end_ofs(struct file *file, struct address_space *mapping, | |||
| 712 | bidx++; | 714 | bidx++; |
| 713 | } else if (bidx) { | 715 | } else if (bidx) { |
| 714 | bh = affs_bread_ino(inode, bidx - 1, 0); | 716 | bh = affs_bread_ino(inode, bidx - 1, 0); |
| 715 | if (IS_ERR(bh)) | 717 | if (IS_ERR(bh)) { |
| 716 | return PTR_ERR(bh); | 718 | written = PTR_ERR(bh); |
| 719 | goto err_first_bh; | ||
| 720 | } | ||
| 717 | } | 721 | } |
| 718 | while (from + bsize <= to) { | 722 | while (from + bsize <= to) { |
| 719 | prev_bh = bh; | 723 | prev_bh = bh; |
| 720 | bh = affs_getemptyblk_ino(inode, bidx); | 724 | bh = affs_getemptyblk_ino(inode, bidx); |
| 721 | if (IS_ERR(bh)) | 725 | if (IS_ERR(bh)) |
| 722 | goto out; | 726 | goto err_bh; |
| 723 | memcpy(AFFS_DATA(bh), data + from, bsize); | 727 | memcpy(AFFS_DATA(bh), data + from, bsize); |
| 724 | if (buffer_new(bh)) { | 728 | if (buffer_new(bh)) { |
| 725 | AFFS_DATA_HEAD(bh)->ptype = cpu_to_be32(T_DATA); | 729 | AFFS_DATA_HEAD(bh)->ptype = cpu_to_be32(T_DATA); |
| @@ -751,7 +755,7 @@ static int affs_write_end_ofs(struct file *file, struct address_space *mapping, | |||
| 751 | prev_bh = bh; | 755 | prev_bh = bh; |
| 752 | bh = affs_bread_ino(inode, bidx, 1); | 756 | bh = affs_bread_ino(inode, bidx, 1); |
| 753 | if (IS_ERR(bh)) | 757 | if (IS_ERR(bh)) |
| 754 | goto out; | 758 | goto err_bh; |
| 755 | tmp = min(bsize, to - from); | 759 | tmp = min(bsize, to - from); |
| 756 | BUG_ON(tmp > bsize); | 760 | BUG_ON(tmp > bsize); |
| 757 | memcpy(AFFS_DATA(bh), data + from, tmp); | 761 | memcpy(AFFS_DATA(bh), data + from, tmp); |
| @@ -790,12 +794,13 @@ done: | |||
| 790 | if (tmp > inode->i_size) | 794 | if (tmp > inode->i_size) |
| 791 | inode->i_size = AFFS_I(inode)->mmu_private = tmp; | 795 | inode->i_size = AFFS_I(inode)->mmu_private = tmp; |
| 792 | 796 | ||
| 797 | err_first_bh: | ||
| 793 | unlock_page(page); | 798 | unlock_page(page); |
| 794 | page_cache_release(page); | 799 | page_cache_release(page); |
| 795 | 800 | ||
| 796 | return written; | 801 | return written; |
| 797 | 802 | ||
| 798 | out: | 803 | err_bh: |
| 799 | bh = prev_bh; | 804 | bh = prev_bh; |
| 800 | if (!written) | 805 | if (!written) |
| 801 | written = PTR_ERR(bh); | 806 | written = PTR_ERR(bh); |
diff --git a/fs/afs/write.c b/fs/afs/write.c index c13cb08964ed..0714abcd7f32 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c | |||
| @@ -14,7 +14,6 @@ | |||
| 14 | #include <linux/pagemap.h> | 14 | #include <linux/pagemap.h> |
| 15 | #include <linux/writeback.h> | 15 | #include <linux/writeback.h> |
| 16 | #include <linux/pagevec.h> | 16 | #include <linux/pagevec.h> |
| 17 | #include <linux/aio.h> | ||
| 18 | #include "internal.h" | 17 | #include "internal.h" |
| 19 | 18 | ||
| 20 | static int afs_write_back_from_locked_page(struct afs_writeback *wb, | 19 | static int afs_write_back_from_locked_page(struct afs_writeback *wb, |
| @@ -151,6 +151,38 @@ struct kioctx { | |||
| 151 | unsigned id; | 151 | unsigned id; |
| 152 | }; | 152 | }; |
| 153 | 153 | ||
| 154 | /* | ||
| 155 | * We use ki_cancel == KIOCB_CANCELLED to indicate that a kiocb has been either | ||
| 156 | * cancelled or completed (this makes a certain amount of sense because | ||
| 157 | * successful cancellation - io_cancel() - does deliver the completion to | ||
| 158 | * userspace). | ||
| 159 | * | ||
| 160 | * And since most things don't implement kiocb cancellation and we'd really like | ||
| 161 | * kiocb completion to be lockless when possible, we use ki_cancel to | ||
| 162 | * synchronize cancellation and completion - we only set it to KIOCB_CANCELLED | ||
| 163 | * with xchg() or cmpxchg(), see batch_complete_aio() and kiocb_cancel(). | ||
| 164 | */ | ||
| 165 | #define KIOCB_CANCELLED ((void *) (~0ULL)) | ||
| 166 | |||
| 167 | struct aio_kiocb { | ||
| 168 | struct kiocb common; | ||
| 169 | |||
| 170 | struct kioctx *ki_ctx; | ||
| 171 | kiocb_cancel_fn *ki_cancel; | ||
| 172 | |||
| 173 | struct iocb __user *ki_user_iocb; /* user's aiocb */ | ||
| 174 | __u64 ki_user_data; /* user's data for completion */ | ||
| 175 | |||
| 176 | struct list_head ki_list; /* the aio core uses this | ||
| 177 | * for cancellation */ | ||
| 178 | |||
| 179 | /* | ||
| 180 | * If the aio_resfd field of the userspace iocb is not zero, | ||
| 181 | * this is the underlying eventfd context to deliver events to. | ||
| 182 | */ | ||
| 183 | struct eventfd_ctx *ki_eventfd; | ||
| 184 | }; | ||
| 185 | |||
| 154 | /*------ sysctl variables----*/ | 186 | /*------ sysctl variables----*/ |
| 155 | static DEFINE_SPINLOCK(aio_nr_lock); | 187 | static DEFINE_SPINLOCK(aio_nr_lock); |
| 156 | unsigned long aio_nr; /* current system wide number of aio requests */ | 188 | unsigned long aio_nr; /* current system wide number of aio requests */ |
| @@ -220,7 +252,7 @@ static int __init aio_setup(void) | |||
| 220 | if (IS_ERR(aio_mnt)) | 252 | if (IS_ERR(aio_mnt)) |
| 221 | panic("Failed to create aio fs mount."); | 253 | panic("Failed to create aio fs mount."); |
| 222 | 254 | ||
| 223 | kiocb_cachep = KMEM_CACHE(kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC); | 255 | kiocb_cachep = KMEM_CACHE(aio_kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC); |
| 224 | kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC); | 256 | kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC); |
| 225 | 257 | ||
| 226 | pr_debug("sizeof(struct page) = %zu\n", sizeof(struct page)); | 258 | pr_debug("sizeof(struct page) = %zu\n", sizeof(struct page)); |
| @@ -278,11 +310,11 @@ static int aio_ring_mmap(struct file *file, struct vm_area_struct *vma) | |||
| 278 | return 0; | 310 | return 0; |
| 279 | } | 311 | } |
| 280 | 312 | ||
| 281 | static void aio_ring_remap(struct file *file, struct vm_area_struct *vma) | 313 | static int aio_ring_remap(struct file *file, struct vm_area_struct *vma) |
| 282 | { | 314 | { |
| 283 | struct mm_struct *mm = vma->vm_mm; | 315 | struct mm_struct *mm = vma->vm_mm; |
| 284 | struct kioctx_table *table; | 316 | struct kioctx_table *table; |
| 285 | int i; | 317 | int i, res = -EINVAL; |
| 286 | 318 | ||
| 287 | spin_lock(&mm->ioctx_lock); | 319 | spin_lock(&mm->ioctx_lock); |
| 288 | rcu_read_lock(); | 320 | rcu_read_lock(); |
| @@ -292,13 +324,17 @@ static void aio_ring_remap(struct file *file, struct vm_area_struct *vma) | |||
| 292 | 324 | ||
| 293 | ctx = table->table[i]; | 325 | ctx = table->table[i]; |
| 294 | if (ctx && ctx->aio_ring_file == file) { | 326 | if (ctx && ctx->aio_ring_file == file) { |
| 295 | ctx->user_id = ctx->mmap_base = vma->vm_start; | 327 | if (!atomic_read(&ctx->dead)) { |
| 328 | ctx->user_id = ctx->mmap_base = vma->vm_start; | ||
| 329 | res = 0; | ||
| 330 | } | ||
| 296 | break; | 331 | break; |
| 297 | } | 332 | } |
| 298 | } | 333 | } |
| 299 | 334 | ||
| 300 | rcu_read_unlock(); | 335 | rcu_read_unlock(); |
| 301 | spin_unlock(&mm->ioctx_lock); | 336 | spin_unlock(&mm->ioctx_lock); |
| 337 | return res; | ||
| 302 | } | 338 | } |
| 303 | 339 | ||
| 304 | static const struct file_operations aio_ring_fops = { | 340 | static const struct file_operations aio_ring_fops = { |
| @@ -480,8 +516,9 @@ static int aio_setup_ring(struct kioctx *ctx) | |||
| 480 | #define AIO_EVENTS_FIRST_PAGE ((PAGE_SIZE - sizeof(struct aio_ring)) / sizeof(struct io_event)) | 516 | #define AIO_EVENTS_FIRST_PAGE ((PAGE_SIZE - sizeof(struct aio_ring)) / sizeof(struct io_event)) |
| 481 | #define AIO_EVENTS_OFFSET (AIO_EVENTS_PER_PAGE - AIO_EVENTS_FIRST_PAGE) | 517 | #define AIO_EVENTS_OFFSET (AIO_EVENTS_PER_PAGE - AIO_EVENTS_FIRST_PAGE) |
| 482 | 518 | ||
| 483 | void kiocb_set_cancel_fn(struct kiocb *req, kiocb_cancel_fn *cancel) | 519 | void kiocb_set_cancel_fn(struct kiocb *iocb, kiocb_cancel_fn *cancel) |
| 484 | { | 520 | { |
| 521 | struct aio_kiocb *req = container_of(iocb, struct aio_kiocb, common); | ||
| 485 | struct kioctx *ctx = req->ki_ctx; | 522 | struct kioctx *ctx = req->ki_ctx; |
| 486 | unsigned long flags; | 523 | unsigned long flags; |
| 487 | 524 | ||
| @@ -496,7 +533,7 @@ void kiocb_set_cancel_fn(struct kiocb *req, kiocb_cancel_fn *cancel) | |||
| 496 | } | 533 | } |
| 497 | EXPORT_SYMBOL(kiocb_set_cancel_fn); | 534 | EXPORT_SYMBOL(kiocb_set_cancel_fn); |
| 498 | 535 | ||
| 499 | static int kiocb_cancel(struct kiocb *kiocb) | 536 | static int kiocb_cancel(struct aio_kiocb *kiocb) |
| 500 | { | 537 | { |
| 501 | kiocb_cancel_fn *old, *cancel; | 538 | kiocb_cancel_fn *old, *cancel; |
| 502 | 539 | ||
| @@ -514,7 +551,7 @@ static int kiocb_cancel(struct kiocb *kiocb) | |||
| 514 | cancel = cmpxchg(&kiocb->ki_cancel, old, KIOCB_CANCELLED); | 551 | cancel = cmpxchg(&kiocb->ki_cancel, old, KIOCB_CANCELLED); |
| 515 | } while (cancel != old); | 552 | } while (cancel != old); |
| 516 | 553 | ||
| 517 | return cancel(kiocb); | 554 | return cancel(&kiocb->common); |
| 518 | } | 555 | } |
| 519 | 556 | ||
| 520 | static void free_ioctx(struct work_struct *work) | 557 | static void free_ioctx(struct work_struct *work) |
| @@ -550,13 +587,13 @@ static void free_ioctx_reqs(struct percpu_ref *ref) | |||
| 550 | static void free_ioctx_users(struct percpu_ref *ref) | 587 | static void free_ioctx_users(struct percpu_ref *ref) |
| 551 | { | 588 | { |
| 552 | struct kioctx *ctx = container_of(ref, struct kioctx, users); | 589 | struct kioctx *ctx = container_of(ref, struct kioctx, users); |
| 553 | struct kiocb *req; | 590 | struct aio_kiocb *req; |
| 554 | 591 | ||
| 555 | spin_lock_irq(&ctx->ctx_lock); | 592 | spin_lock_irq(&ctx->ctx_lock); |
| 556 | 593 | ||
| 557 | while (!list_empty(&ctx->active_reqs)) { | 594 | while (!list_empty(&ctx->active_reqs)) { |
| 558 | req = list_first_entry(&ctx->active_reqs, | 595 | req = list_first_entry(&ctx->active_reqs, |
| 559 | struct kiocb, ki_list); | 596 | struct aio_kiocb, ki_list); |
| 560 | 597 | ||
| 561 | list_del_init(&req->ki_list); | 598 | list_del_init(&req->ki_list); |
| 562 | kiocb_cancel(req); | 599 | kiocb_cancel(req); |
| @@ -727,6 +764,9 @@ static struct kioctx *ioctx_alloc(unsigned nr_events) | |||
| 727 | err_cleanup: | 764 | err_cleanup: |
| 728 | aio_nr_sub(ctx->max_reqs); | 765 | aio_nr_sub(ctx->max_reqs); |
| 729 | err_ctx: | 766 | err_ctx: |
| 767 | atomic_set(&ctx->dead, 1); | ||
| 768 | if (ctx->mmap_size) | ||
| 769 | vm_munmap(ctx->mmap_base, ctx->mmap_size); | ||
| 730 | aio_free_ring(ctx); | 770 | aio_free_ring(ctx); |
| 731 | err: | 771 | err: |
| 732 | mutex_unlock(&ctx->ring_lock); | 772 | mutex_unlock(&ctx->ring_lock); |
| @@ -748,11 +788,12 @@ static int kill_ioctx(struct mm_struct *mm, struct kioctx *ctx, | |||
| 748 | { | 788 | { |
| 749 | struct kioctx_table *table; | 789 | struct kioctx_table *table; |
| 750 | 790 | ||
| 751 | if (atomic_xchg(&ctx->dead, 1)) | 791 | spin_lock(&mm->ioctx_lock); |
| 792 | if (atomic_xchg(&ctx->dead, 1)) { | ||
| 793 | spin_unlock(&mm->ioctx_lock); | ||
| 752 | return -EINVAL; | 794 | return -EINVAL; |
| 795 | } | ||
| 753 | 796 | ||
| 754 | |||
| 755 | spin_lock(&mm->ioctx_lock); | ||
| 756 | table = rcu_dereference_raw(mm->ioctx_table); | 797 | table = rcu_dereference_raw(mm->ioctx_table); |
| 757 | WARN_ON(ctx != table->table[ctx->id]); | 798 | WARN_ON(ctx != table->table[ctx->id]); |
| 758 | table->table[ctx->id] = NULL; | 799 | table->table[ctx->id] = NULL; |
| @@ -778,22 +819,6 @@ static int kill_ioctx(struct mm_struct *mm, struct kioctx *ctx, | |||
| 778 | return 0; | 819 | return 0; |
| 779 | } | 820 | } |
| 780 | 821 | ||
| 781 | /* wait_on_sync_kiocb: | ||
| 782 | * Waits on the given sync kiocb to complete. | ||
| 783 | */ | ||
| 784 | ssize_t wait_on_sync_kiocb(struct kiocb *req) | ||
| 785 | { | ||
| 786 | while (!req->ki_ctx) { | ||
| 787 | set_current_state(TASK_UNINTERRUPTIBLE); | ||
| 788 | if (req->ki_ctx) | ||
| 789 | break; | ||
| 790 | io_schedule(); | ||
| 791 | } | ||
| 792 | __set_current_state(TASK_RUNNING); | ||
| 793 | return req->ki_user_data; | ||
| 794 | } | ||
| 795 | EXPORT_SYMBOL(wait_on_sync_kiocb); | ||
| 796 | |||
| 797 | /* | 822 | /* |
| 798 | * exit_aio: called when the last user of mm goes away. At this point, there is | 823 | * exit_aio: called when the last user of mm goes away. At this point, there is |
| 799 | * no way for any new requests to be submited or any of the io_* syscalls to be | 824 | * no way for any new requests to be submited or any of the io_* syscalls to be |
| @@ -948,9 +973,9 @@ static void user_refill_reqs_available(struct kioctx *ctx) | |||
| 948 | * Allocate a slot for an aio request. | 973 | * Allocate a slot for an aio request. |
| 949 | * Returns NULL if no requests are free. | 974 | * Returns NULL if no requests are free. |
| 950 | */ | 975 | */ |
| 951 | static inline struct kiocb *aio_get_req(struct kioctx *ctx) | 976 | static inline struct aio_kiocb *aio_get_req(struct kioctx *ctx) |
| 952 | { | 977 | { |
| 953 | struct kiocb *req; | 978 | struct aio_kiocb *req; |
| 954 | 979 | ||
| 955 | if (!get_reqs_available(ctx)) { | 980 | if (!get_reqs_available(ctx)) { |
| 956 | user_refill_reqs_available(ctx); | 981 | user_refill_reqs_available(ctx); |
| @@ -971,10 +996,10 @@ out_put: | |||
| 971 | return NULL; | 996 | return NULL; |
| 972 | } | 997 | } |
| 973 | 998 | ||
| 974 | static void kiocb_free(struct kiocb *req) | 999 | static void kiocb_free(struct aio_kiocb *req) |
| 975 | { | 1000 | { |
| 976 | if (req->ki_filp) | 1001 | if (req->common.ki_filp) |
| 977 | fput(req->ki_filp); | 1002 | fput(req->common.ki_filp); |
| 978 | if (req->ki_eventfd != NULL) | 1003 | if (req->ki_eventfd != NULL) |
| 979 | eventfd_ctx_put(req->ki_eventfd); | 1004 | eventfd_ctx_put(req->ki_eventfd); |
| 980 | kmem_cache_free(kiocb_cachep, req); | 1005 | kmem_cache_free(kiocb_cachep, req); |
| @@ -1010,8 +1035,9 @@ out: | |||
| 1010 | /* aio_complete | 1035 | /* aio_complete |
| 1011 | * Called when the io request on the given iocb is complete. | 1036 | * Called when the io request on the given iocb is complete. |
| 1012 | */ | 1037 | */ |
| 1013 | void aio_complete(struct kiocb *iocb, long res, long res2) | 1038 | static void aio_complete(struct kiocb *kiocb, long res, long res2) |
| 1014 | { | 1039 | { |
| 1040 | struct aio_kiocb *iocb = container_of(kiocb, struct aio_kiocb, common); | ||
| 1015 | struct kioctx *ctx = iocb->ki_ctx; | 1041 | struct kioctx *ctx = iocb->ki_ctx; |
| 1016 | struct aio_ring *ring; | 1042 | struct aio_ring *ring; |
| 1017 | struct io_event *ev_page, *event; | 1043 | struct io_event *ev_page, *event; |
| @@ -1025,13 +1051,7 @@ void aio_complete(struct kiocb *iocb, long res, long res2) | |||
| 1025 | * ref, no other paths have a way to get another ref | 1051 | * ref, no other paths have a way to get another ref |
| 1026 | * - the sync task helpfully left a reference to itself in the iocb | 1052 | * - the sync task helpfully left a reference to itself in the iocb |
| 1027 | */ | 1053 | */ |
| 1028 | if (is_sync_kiocb(iocb)) { | 1054 | BUG_ON(is_sync_kiocb(kiocb)); |
| 1029 | iocb->ki_user_data = res; | ||
| 1030 | smp_wmb(); | ||
| 1031 | iocb->ki_ctx = ERR_PTR(-EXDEV); | ||
| 1032 | wake_up_process(iocb->ki_obj.tsk); | ||
| 1033 | return; | ||
| 1034 | } | ||
| 1035 | 1055 | ||
| 1036 | if (iocb->ki_list.next) { | 1056 | if (iocb->ki_list.next) { |
| 1037 | unsigned long flags; | 1057 | unsigned long flags; |
| @@ -1057,7 +1077,7 @@ void aio_complete(struct kiocb *iocb, long res, long res2) | |||
| 1057 | ev_page = kmap_atomic(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]); | 1077 | ev_page = kmap_atomic(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]); |
| 1058 | event = ev_page + pos % AIO_EVENTS_PER_PAGE; | 1078 | event = ev_page + pos % AIO_EVENTS_PER_PAGE; |
| 1059 | 1079 | ||
| 1060 | event->obj = (u64)(unsigned long)iocb->ki_obj.user; | 1080 | event->obj = (u64)(unsigned long)iocb->ki_user_iocb; |
| 1061 | event->data = iocb->ki_user_data; | 1081 | event->data = iocb->ki_user_data; |
| 1062 | event->res = res; | 1082 | event->res = res; |
| 1063 | event->res2 = res2; | 1083 | event->res2 = res2; |
| @@ -1066,7 +1086,7 @@ void aio_complete(struct kiocb *iocb, long res, long res2) | |||
| 1066 | flush_dcache_page(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]); | 1086 | flush_dcache_page(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]); |
| 1067 | 1087 | ||
| 1068 | pr_debug("%p[%u]: %p: %p %Lx %lx %lx\n", | 1088 | pr_debug("%p[%u]: %p: %p %Lx %lx %lx\n", |
| 1069 | ctx, tail, iocb, iocb->ki_obj.user, iocb->ki_user_data, | 1089 | ctx, tail, iocb, iocb->ki_user_iocb, iocb->ki_user_data, |
| 1070 | res, res2); | 1090 | res, res2); |
| 1071 | 1091 | ||
| 1072 | /* after flagging the request as done, we | 1092 | /* after flagging the request as done, we |
| @@ -1113,7 +1133,6 @@ void aio_complete(struct kiocb *iocb, long res, long res2) | |||
| 1113 | 1133 | ||
| 1114 | percpu_ref_put(&ctx->reqs); | 1134 | percpu_ref_put(&ctx->reqs); |
| 1115 | } | 1135 | } |
| 1116 | EXPORT_SYMBOL(aio_complete); | ||
| 1117 | 1136 | ||
| 1118 | /* aio_read_events_ring | 1137 | /* aio_read_events_ring |
| 1119 | * Pull an event off of the ioctx's event ring. Returns the number of | 1138 | * Pull an event off of the ioctx's event ring. Returns the number of |
| @@ -1341,46 +1360,19 @@ typedef ssize_t (aio_rw_op)(struct kiocb *, const struct iovec *, | |||
| 1341 | unsigned long, loff_t); | 1360 | unsigned long, loff_t); |
| 1342 | typedef ssize_t (rw_iter_op)(struct kiocb *, struct iov_iter *); | 1361 | typedef ssize_t (rw_iter_op)(struct kiocb *, struct iov_iter *); |
| 1343 | 1362 | ||
| 1344 | static ssize_t aio_setup_vectored_rw(struct kiocb *kiocb, | 1363 | static int aio_setup_vectored_rw(int rw, char __user *buf, size_t len, |
| 1345 | int rw, char __user *buf, | 1364 | struct iovec **iovec, |
| 1346 | unsigned long *nr_segs, | 1365 | bool compat, |
| 1347 | struct iovec **iovec, | 1366 | struct iov_iter *iter) |
| 1348 | bool compat) | ||
| 1349 | { | 1367 | { |
| 1350 | ssize_t ret; | ||
| 1351 | |||
| 1352 | *nr_segs = kiocb->ki_nbytes; | ||
| 1353 | |||
| 1354 | #ifdef CONFIG_COMPAT | 1368 | #ifdef CONFIG_COMPAT |
| 1355 | if (compat) | 1369 | if (compat) |
| 1356 | ret = compat_rw_copy_check_uvector(rw, | 1370 | return compat_import_iovec(rw, |
| 1357 | (struct compat_iovec __user *)buf, | 1371 | (struct compat_iovec __user *)buf, |
| 1358 | *nr_segs, UIO_FASTIOV, *iovec, iovec); | 1372 | len, UIO_FASTIOV, iovec, iter); |
| 1359 | else | ||
| 1360 | #endif | 1373 | #endif |
| 1361 | ret = rw_copy_check_uvector(rw, | 1374 | return import_iovec(rw, (struct iovec __user *)buf, |
| 1362 | (struct iovec __user *)buf, | 1375 | len, UIO_FASTIOV, iovec, iter); |
| 1363 | *nr_segs, UIO_FASTIOV, *iovec, iovec); | ||
| 1364 | if (ret < 0) | ||
| 1365 | return ret; | ||
| 1366 | |||
| 1367 | /* ki_nbytes now reflect bytes instead of segs */ | ||
| 1368 | kiocb->ki_nbytes = ret; | ||
| 1369 | return 0; | ||
| 1370 | } | ||
| 1371 | |||
| 1372 | static ssize_t aio_setup_single_vector(struct kiocb *kiocb, | ||
| 1373 | int rw, char __user *buf, | ||
| 1374 | unsigned long *nr_segs, | ||
| 1375 | struct iovec *iovec) | ||
| 1376 | { | ||
| 1377 | if (unlikely(!access_ok(!rw, buf, kiocb->ki_nbytes))) | ||
| 1378 | return -EFAULT; | ||
| 1379 | |||
| 1380 | iovec->iov_base = buf; | ||
| 1381 | iovec->iov_len = kiocb->ki_nbytes; | ||
| 1382 | *nr_segs = 1; | ||
| 1383 | return 0; | ||
| 1384 | } | 1376 | } |
| 1385 | 1377 | ||
| 1386 | /* | 1378 | /* |
| @@ -1388,11 +1380,10 @@ static ssize_t aio_setup_single_vector(struct kiocb *kiocb, | |||
| 1388 | * Performs the initial checks and io submission. | 1380 | * Performs the initial checks and io submission. |
| 1389 | */ | 1381 | */ |
| 1390 | static ssize_t aio_run_iocb(struct kiocb *req, unsigned opcode, | 1382 | static ssize_t aio_run_iocb(struct kiocb *req, unsigned opcode, |
| 1391 | char __user *buf, bool compat) | 1383 | char __user *buf, size_t len, bool compat) |
| 1392 | { | 1384 | { |
| 1393 | struct file *file = req->ki_filp; | 1385 | struct file *file = req->ki_filp; |
| 1394 | ssize_t ret; | 1386 | ssize_t ret; |
| 1395 | unsigned long nr_segs; | ||
| 1396 | int rw; | 1387 | int rw; |
| 1397 | fmode_t mode; | 1388 | fmode_t mode; |
| 1398 | aio_rw_op *rw_op; | 1389 | aio_rw_op *rw_op; |
| @@ -1423,21 +1414,22 @@ rw_common: | |||
| 1423 | if (!rw_op && !iter_op) | 1414 | if (!rw_op && !iter_op) |
| 1424 | return -EINVAL; | 1415 | return -EINVAL; |
| 1425 | 1416 | ||
| 1426 | ret = (opcode == IOCB_CMD_PREADV || | 1417 | if (opcode == IOCB_CMD_PREADV || opcode == IOCB_CMD_PWRITEV) |
| 1427 | opcode == IOCB_CMD_PWRITEV) | 1418 | ret = aio_setup_vectored_rw(rw, buf, len, |
| 1428 | ? aio_setup_vectored_rw(req, rw, buf, &nr_segs, | 1419 | &iovec, compat, &iter); |
| 1429 | &iovec, compat) | 1420 | else { |
| 1430 | : aio_setup_single_vector(req, rw, buf, &nr_segs, | 1421 | ret = import_single_range(rw, buf, len, iovec, &iter); |
| 1431 | iovec); | 1422 | iovec = NULL; |
| 1423 | } | ||
| 1432 | if (!ret) | 1424 | if (!ret) |
| 1433 | ret = rw_verify_area(rw, file, &req->ki_pos, req->ki_nbytes); | 1425 | ret = rw_verify_area(rw, file, &req->ki_pos, |
| 1426 | iov_iter_count(&iter)); | ||
| 1434 | if (ret < 0) { | 1427 | if (ret < 0) { |
| 1435 | if (iovec != inline_vecs) | 1428 | kfree(iovec); |
| 1436 | kfree(iovec); | ||
| 1437 | return ret; | 1429 | return ret; |
| 1438 | } | 1430 | } |
| 1439 | 1431 | ||
| 1440 | req->ki_nbytes = ret; | 1432 | len = ret; |
| 1441 | 1433 | ||
| 1442 | /* XXX: move/kill - rw_verify_area()? */ | 1434 | /* XXX: move/kill - rw_verify_area()? */ |
| 1443 | /* This matches the pread()/pwrite() logic */ | 1435 | /* This matches the pread()/pwrite() logic */ |
| @@ -1450,14 +1442,14 @@ rw_common: | |||
| 1450 | file_start_write(file); | 1442 | file_start_write(file); |
| 1451 | 1443 | ||
| 1452 | if (iter_op) { | 1444 | if (iter_op) { |
| 1453 | iov_iter_init(&iter, rw, iovec, nr_segs, req->ki_nbytes); | ||
| 1454 | ret = iter_op(req, &iter); | 1445 | ret = iter_op(req, &iter); |
| 1455 | } else { | 1446 | } else { |
| 1456 | ret = rw_op(req, iovec, nr_segs, req->ki_pos); | 1447 | ret = rw_op(req, iter.iov, iter.nr_segs, req->ki_pos); |
| 1457 | } | 1448 | } |
| 1458 | 1449 | ||
| 1459 | if (rw == WRITE) | 1450 | if (rw == WRITE) |
| 1460 | file_end_write(file); | 1451 | file_end_write(file); |
| 1452 | kfree(iovec); | ||
| 1461 | break; | 1453 | break; |
| 1462 | 1454 | ||
| 1463 | case IOCB_CMD_FDSYNC: | 1455 | case IOCB_CMD_FDSYNC: |
| @@ -1479,9 +1471,6 @@ rw_common: | |||
| 1479 | return -EINVAL; | 1471 | return -EINVAL; |
| 1480 | } | 1472 | } |
| 1481 | 1473 | ||
| 1482 | if (iovec != inline_vecs) | ||
| 1483 | kfree(iovec); | ||
| 1484 | |||
| 1485 | if (ret != -EIOCBQUEUED) { | 1474 | if (ret != -EIOCBQUEUED) { |
| 1486 | /* | 1475 | /* |
| 1487 | * There's no easy way to restart the syscall since other AIO's | 1476 | * There's no easy way to restart the syscall since other AIO's |
| @@ -1500,7 +1489,7 @@ rw_common: | |||
| 1500 | static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, | 1489 | static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, |
| 1501 | struct iocb *iocb, bool compat) | 1490 | struct iocb *iocb, bool compat) |
| 1502 | { | 1491 | { |
| 1503 | struct kiocb *req; | 1492 | struct aio_kiocb *req; |
| 1504 | ssize_t ret; | 1493 | ssize_t ret; |
| 1505 | 1494 | ||
| 1506 | /* enforce forwards compatibility on users */ | 1495 | /* enforce forwards compatibility on users */ |
| @@ -1523,11 +1512,14 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, | |||
| 1523 | if (unlikely(!req)) | 1512 | if (unlikely(!req)) |
| 1524 | return -EAGAIN; | 1513 | return -EAGAIN; |
| 1525 | 1514 | ||
| 1526 | req->ki_filp = fget(iocb->aio_fildes); | 1515 | req->common.ki_filp = fget(iocb->aio_fildes); |
| 1527 | if (unlikely(!req->ki_filp)) { | 1516 | if (unlikely(!req->common.ki_filp)) { |
| 1528 | ret = -EBADF; | 1517 | ret = -EBADF; |
| 1529 | goto out_put_req; | 1518 | goto out_put_req; |
| 1530 | } | 1519 | } |
| 1520 | req->common.ki_pos = iocb->aio_offset; | ||
| 1521 | req->common.ki_complete = aio_complete; | ||
| 1522 | req->common.ki_flags = 0; | ||
| 1531 | 1523 | ||
| 1532 | if (iocb->aio_flags & IOCB_FLAG_RESFD) { | 1524 | if (iocb->aio_flags & IOCB_FLAG_RESFD) { |
| 1533 | /* | 1525 | /* |
| @@ -1542,6 +1534,8 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, | |||
| 1542 | req->ki_eventfd = NULL; | 1534 | req->ki_eventfd = NULL; |
| 1543 | goto out_put_req; | 1535 | goto out_put_req; |
| 1544 | } | 1536 | } |
| 1537 | |||
| 1538 | req->common.ki_flags |= IOCB_EVENTFD; | ||
| 1545 | } | 1539 | } |
| 1546 | 1540 | ||
| 1547 | ret = put_user(KIOCB_KEY, &user_iocb->aio_key); | 1541 | ret = put_user(KIOCB_KEY, &user_iocb->aio_key); |
| @@ -1550,13 +1544,12 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, | |||
| 1550 | goto out_put_req; | 1544 | goto out_put_req; |
| 1551 | } | 1545 | } |
| 1552 | 1546 | ||
| 1553 | req->ki_obj.user = user_iocb; | 1547 | req->ki_user_iocb = user_iocb; |
| 1554 | req->ki_user_data = iocb->aio_data; | 1548 | req->ki_user_data = iocb->aio_data; |
| 1555 | req->ki_pos = iocb->aio_offset; | ||
| 1556 | req->ki_nbytes = iocb->aio_nbytes; | ||
| 1557 | 1549 | ||
| 1558 | ret = aio_run_iocb(req, iocb->aio_lio_opcode, | 1550 | ret = aio_run_iocb(&req->common, iocb->aio_lio_opcode, |
| 1559 | (char __user *)(unsigned long)iocb->aio_buf, | 1551 | (char __user *)(unsigned long)iocb->aio_buf, |
| 1552 | iocb->aio_nbytes, | ||
| 1560 | compat); | 1553 | compat); |
| 1561 | if (ret) | 1554 | if (ret) |
| 1562 | goto out_put_req; | 1555 | goto out_put_req; |
| @@ -1643,10 +1636,10 @@ SYSCALL_DEFINE3(io_submit, aio_context_t, ctx_id, long, nr, | |||
| 1643 | /* lookup_kiocb | 1636 | /* lookup_kiocb |
| 1644 | * Finds a given iocb for cancellation. | 1637 | * Finds a given iocb for cancellation. |
| 1645 | */ | 1638 | */ |
| 1646 | static struct kiocb *lookup_kiocb(struct kioctx *ctx, struct iocb __user *iocb, | 1639 | static struct aio_kiocb * |
| 1647 | u32 key) | 1640 | lookup_kiocb(struct kioctx *ctx, struct iocb __user *iocb, u32 key) |
| 1648 | { | 1641 | { |
| 1649 | struct list_head *pos; | 1642 | struct aio_kiocb *kiocb; |
| 1650 | 1643 | ||
| 1651 | assert_spin_locked(&ctx->ctx_lock); | 1644 | assert_spin_locked(&ctx->ctx_lock); |
| 1652 | 1645 | ||
| @@ -1654,9 +1647,8 @@ static struct kiocb *lookup_kiocb(struct kioctx *ctx, struct iocb __user *iocb, | |||
| 1654 | return NULL; | 1647 | return NULL; |
| 1655 | 1648 | ||
| 1656 | /* TODO: use a hash or array, this sucks. */ | 1649 | /* TODO: use a hash or array, this sucks. */ |
| 1657 | list_for_each(pos, &ctx->active_reqs) { | 1650 | list_for_each_entry(kiocb, &ctx->active_reqs, ki_list) { |
| 1658 | struct kiocb *kiocb = list_kiocb(pos); | 1651 | if (kiocb->ki_user_iocb == iocb) |
| 1659 | if (kiocb->ki_obj.user == iocb) | ||
| 1660 | return kiocb; | 1652 | return kiocb; |
| 1661 | } | 1653 | } |
| 1662 | return NULL; | 1654 | return NULL; |
| @@ -1676,7 +1668,7 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb, | |||
| 1676 | struct io_event __user *, result) | 1668 | struct io_event __user *, result) |
| 1677 | { | 1669 | { |
| 1678 | struct kioctx *ctx; | 1670 | struct kioctx *ctx; |
| 1679 | struct kiocb *kiocb; | 1671 | struct aio_kiocb *kiocb; |
| 1680 | u32 key; | 1672 | u32 key; |
| 1681 | int ret; | 1673 | int ret; |
| 1682 | 1674 | ||
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c index 90bc079d9982..fdcb4d69f430 100644 --- a/fs/bfs/inode.c +++ b/fs/bfs/inode.c | |||
| @@ -15,6 +15,7 @@ | |||
| 15 | #include <linux/buffer_head.h> | 15 | #include <linux/buffer_head.h> |
| 16 | #include <linux/vfs.h> | 16 | #include <linux/vfs.h> |
| 17 | #include <linux/writeback.h> | 17 | #include <linux/writeback.h> |
| 18 | #include <linux/uio.h> | ||
| 18 | #include <asm/uaccess.h> | 19 | #include <asm/uaccess.h> |
| 19 | #include "bfs.h" | 20 | #include "bfs.h" |
| 20 | 21 | ||
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 995986b8e36b..241ef68d2893 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c | |||
| @@ -31,6 +31,7 @@ | |||
| 31 | #include <linux/security.h> | 31 | #include <linux/security.h> |
| 32 | #include <linux/random.h> | 32 | #include <linux/random.h> |
| 33 | #include <linux/elf.h> | 33 | #include <linux/elf.h> |
| 34 | #include <linux/elf-randomize.h> | ||
| 34 | #include <linux/utsname.h> | 35 | #include <linux/utsname.h> |
| 35 | #include <linux/coredump.h> | 36 | #include <linux/coredump.h> |
| 36 | #include <linux/sched.h> | 37 | #include <linux/sched.h> |
| @@ -862,6 +863,7 @@ static int load_elf_binary(struct linux_binprm *bprm) | |||
| 862 | i < loc->elf_ex.e_phnum; i++, elf_ppnt++) { | 863 | i < loc->elf_ex.e_phnum; i++, elf_ppnt++) { |
| 863 | int elf_prot = 0, elf_flags; | 864 | int elf_prot = 0, elf_flags; |
| 864 | unsigned long k, vaddr; | 865 | unsigned long k, vaddr; |
| 866 | unsigned long total_size = 0; | ||
| 865 | 867 | ||
| 866 | if (elf_ppnt->p_type != PT_LOAD) | 868 | if (elf_ppnt->p_type != PT_LOAD) |
| 867 | continue; | 869 | continue; |
| @@ -909,25 +911,20 @@ static int load_elf_binary(struct linux_binprm *bprm) | |||
| 909 | * default mmap base, as well as whatever program they | 911 | * default mmap base, as well as whatever program they |
| 910 | * might try to exec. This is because the brk will | 912 | * might try to exec. This is because the brk will |
| 911 | * follow the loader, and is not movable. */ | 913 | * follow the loader, and is not movable. */ |
| 912 | #ifdef CONFIG_ARCH_BINFMT_ELF_RANDOMIZE_PIE | 914 | load_bias = ELF_ET_DYN_BASE - vaddr; |
| 913 | /* Memory randomization might have been switched off | ||
| 914 | * in runtime via sysctl or explicit setting of | ||
| 915 | * personality flags. | ||
| 916 | * If that is the case, retain the original non-zero | ||
| 917 | * load_bias value in order to establish proper | ||
| 918 | * non-randomized mappings. | ||
| 919 | */ | ||
| 920 | if (current->flags & PF_RANDOMIZE) | 915 | if (current->flags & PF_RANDOMIZE) |
| 921 | load_bias = 0; | 916 | load_bias += arch_mmap_rnd(); |
| 922 | else | 917 | load_bias = ELF_PAGESTART(load_bias); |
| 923 | load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr); | 918 | total_size = total_mapping_size(elf_phdata, |
| 924 | #else | 919 | loc->elf_ex.e_phnum); |
| 925 | load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr); | 920 | if (!total_size) { |
| 926 | #endif | 921 | error = -EINVAL; |
| 922 | goto out_free_dentry; | ||
| 923 | } | ||
| 927 | } | 924 | } |
| 928 | 925 | ||
| 929 | error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt, | 926 | error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt, |
| 930 | elf_prot, elf_flags, 0); | 927 | elf_prot, elf_flags, total_size); |
| 931 | if (BAD_ADDR(error)) { | 928 | if (BAD_ADDR(error)) { |
| 932 | retval = IS_ERR((void *)error) ? | 929 | retval = IS_ERR((void *)error) ? |
| 933 | PTR_ERR((void*)error) : -EINVAL; | 930 | PTR_ERR((void*)error) : -EINVAL; |
| @@ -1053,15 +1050,13 @@ static int load_elf_binary(struct linux_binprm *bprm) | |||
| 1053 | current->mm->end_data = end_data; | 1050 | current->mm->end_data = end_data; |
| 1054 | current->mm->start_stack = bprm->p; | 1051 | current->mm->start_stack = bprm->p; |
| 1055 | 1052 | ||
| 1056 | #ifdef arch_randomize_brk | ||
| 1057 | if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) { | 1053 | if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) { |
| 1058 | current->mm->brk = current->mm->start_brk = | 1054 | current->mm->brk = current->mm->start_brk = |
| 1059 | arch_randomize_brk(current->mm); | 1055 | arch_randomize_brk(current->mm); |
| 1060 | #ifdef CONFIG_COMPAT_BRK | 1056 | #ifdef compat_brk_randomized |
| 1061 | current->brk_randomized = 1; | 1057 | current->brk_randomized = 1; |
| 1062 | #endif | 1058 | #endif |
| 1063 | } | 1059 | } |
| 1064 | #endif | ||
| 1065 | 1060 | ||
| 1066 | if (current->personality & MMAP_PAGE_ZERO) { | 1061 | if (current->personality & MMAP_PAGE_ZERO) { |
| 1067 | /* Why this, you ask??? Well SVr4 maps page 0 as read-only, | 1062 | /* Why this, you ask??? Well SVr4 maps page 0 as read-only, |
diff --git a/fs/block_dev.c b/fs/block_dev.c index 975266be67d3..2e522aed6584 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c | |||
| @@ -27,7 +27,6 @@ | |||
| 27 | #include <linux/namei.h> | 27 | #include <linux/namei.h> |
| 28 | #include <linux/log2.h> | 28 | #include <linux/log2.h> |
| 29 | #include <linux/cleancache.h> | 29 | #include <linux/cleancache.h> |
| 30 | #include <linux/aio.h> | ||
| 31 | #include <asm/uaccess.h> | 30 | #include <asm/uaccess.h> |
| 32 | #include "internal.h" | 31 | #include "internal.h" |
| 33 | 32 | ||
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 84c3b00f3de8..f9c89cae39ee 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
| @@ -3387,6 +3387,8 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, | |||
| 3387 | 3387 | ||
| 3388 | int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, | 3388 | int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, |
| 3389 | struct btrfs_root *root); | 3389 | struct btrfs_root *root); |
| 3390 | int btrfs_setup_space_cache(struct btrfs_trans_handle *trans, | ||
| 3391 | struct btrfs_root *root); | ||
| 3390 | int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr); | 3392 | int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr); |
| 3391 | int btrfs_free_block_groups(struct btrfs_fs_info *info); | 3393 | int btrfs_free_block_groups(struct btrfs_fs_info *info); |
| 3392 | int btrfs_read_block_groups(struct btrfs_root *root); | 3394 | int btrfs_read_block_groups(struct btrfs_root *root); |
| @@ -3909,6 +3911,9 @@ int btrfs_prealloc_file_range_trans(struct inode *inode, | |||
| 3909 | loff_t actual_len, u64 *alloc_hint); | 3911 | loff_t actual_len, u64 *alloc_hint); |
| 3910 | int btrfs_inode_check_errors(struct inode *inode); | 3912 | int btrfs_inode_check_errors(struct inode *inode); |
| 3911 | extern const struct dentry_operations btrfs_dentry_operations; | 3913 | extern const struct dentry_operations btrfs_dentry_operations; |
| 3914 | #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS | ||
| 3915 | void btrfs_test_inode_set_ops(struct inode *inode); | ||
| 3916 | #endif | ||
| 3912 | 3917 | ||
| 3913 | /* ioctl.c */ | 3918 | /* ioctl.c */ |
| 3914 | long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); | 3919 | long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); |
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index f79f38542a73..639f2663ed3f 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
| @@ -3921,7 +3921,7 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info, | |||
| 3921 | } | 3921 | } |
| 3922 | if (btrfs_super_sys_array_size(sb) < sizeof(struct btrfs_disk_key) | 3922 | if (btrfs_super_sys_array_size(sb) < sizeof(struct btrfs_disk_key) |
| 3923 | + sizeof(struct btrfs_chunk)) { | 3923 | + sizeof(struct btrfs_chunk)) { |
| 3924 | printk(KERN_ERR "BTRFS: system chunk array too small %u < %lu\n", | 3924 | printk(KERN_ERR "BTRFS: system chunk array too small %u < %zu\n", |
| 3925 | btrfs_super_sys_array_size(sb), | 3925 | btrfs_super_sys_array_size(sb), |
| 3926 | sizeof(struct btrfs_disk_key) | 3926 | sizeof(struct btrfs_disk_key) |
| 3927 | + sizeof(struct btrfs_chunk)); | 3927 | + sizeof(struct btrfs_chunk)); |
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 6f080451fcb1..8b353ad02f03 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
| @@ -3325,6 +3325,32 @@ out: | |||
| 3325 | return ret; | 3325 | return ret; |
| 3326 | } | 3326 | } |
| 3327 | 3327 | ||
| 3328 | int btrfs_setup_space_cache(struct btrfs_trans_handle *trans, | ||
| 3329 | struct btrfs_root *root) | ||
| 3330 | { | ||
| 3331 | struct btrfs_block_group_cache *cache, *tmp; | ||
| 3332 | struct btrfs_transaction *cur_trans = trans->transaction; | ||
| 3333 | struct btrfs_path *path; | ||
| 3334 | |||
| 3335 | if (list_empty(&cur_trans->dirty_bgs) || | ||
| 3336 | !btrfs_test_opt(root, SPACE_CACHE)) | ||
| 3337 | return 0; | ||
| 3338 | |||
| 3339 | path = btrfs_alloc_path(); | ||
| 3340 | if (!path) | ||
| 3341 | return -ENOMEM; | ||
| 3342 | |||
| 3343 | /* Could add new block groups, use _safe just in case */ | ||
| 3344 | list_for_each_entry_safe(cache, tmp, &cur_trans->dirty_bgs, | ||
| 3345 | dirty_list) { | ||
| 3346 | if (cache->disk_cache_state == BTRFS_DC_CLEAR) | ||
| 3347 | cache_save_setup(cache, trans, path); | ||
| 3348 | } | ||
| 3349 | |||
| 3350 | btrfs_free_path(path); | ||
| 3351 | return 0; | ||
| 3352 | } | ||
| 3353 | |||
| 3328 | int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, | 3354 | int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, |
| 3329 | struct btrfs_root *root) | 3355 | struct btrfs_root *root) |
| 3330 | { | 3356 | { |
| @@ -5110,7 +5136,11 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) | |||
| 5110 | num_bytes = ALIGN(num_bytes, root->sectorsize); | 5136 | num_bytes = ALIGN(num_bytes, root->sectorsize); |
| 5111 | 5137 | ||
| 5112 | spin_lock(&BTRFS_I(inode)->lock); | 5138 | spin_lock(&BTRFS_I(inode)->lock); |
| 5113 | BTRFS_I(inode)->outstanding_extents++; | 5139 | nr_extents = (unsigned)div64_u64(num_bytes + |
| 5140 | BTRFS_MAX_EXTENT_SIZE - 1, | ||
| 5141 | BTRFS_MAX_EXTENT_SIZE); | ||
| 5142 | BTRFS_I(inode)->outstanding_extents += nr_extents; | ||
| 5143 | nr_extents = 0; | ||
| 5114 | 5144 | ||
| 5115 | if (BTRFS_I(inode)->outstanding_extents > | 5145 | if (BTRFS_I(inode)->outstanding_extents > |
| 5116 | BTRFS_I(inode)->reserved_extents) | 5146 | BTRFS_I(inode)->reserved_extents) |
| @@ -5255,6 +5285,9 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes) | |||
| 5255 | if (dropped > 0) | 5285 | if (dropped > 0) |
| 5256 | to_free += btrfs_calc_trans_metadata_size(root, dropped); | 5286 | to_free += btrfs_calc_trans_metadata_size(root, dropped); |
| 5257 | 5287 | ||
| 5288 | if (btrfs_test_is_dummy_root(root)) | ||
| 5289 | return; | ||
| 5290 | |||
| 5258 | trace_btrfs_space_reservation(root->fs_info, "delalloc", | 5291 | trace_btrfs_space_reservation(root->fs_info, "delalloc", |
| 5259 | btrfs_ino(inode), to_free, 0); | 5292 | btrfs_ino(inode), to_free, 0); |
| 5260 | if (root->fs_info->quota_enabled) { | 5293 | if (root->fs_info->quota_enabled) { |
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index c7233ff1d533..d688cfe5d496 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
| @@ -4968,6 +4968,12 @@ static int release_extent_buffer(struct extent_buffer *eb) | |||
| 4968 | 4968 | ||
| 4969 | /* Should be safe to release our pages at this point */ | 4969 | /* Should be safe to release our pages at this point */ |
| 4970 | btrfs_release_extent_buffer_page(eb); | 4970 | btrfs_release_extent_buffer_page(eb); |
| 4971 | #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS | ||
| 4972 | if (unlikely(test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags))) { | ||
| 4973 | __free_extent_buffer(eb); | ||
| 4974 | return 1; | ||
| 4975 | } | ||
| 4976 | #endif | ||
| 4971 | call_rcu(&eb->rcu_head, btrfs_release_extent_buffer_rcu); | 4977 | call_rcu(&eb->rcu_head, btrfs_release_extent_buffer_rcu); |
| 4972 | return 1; | 4978 | return 1; |
| 4973 | } | 4979 | } |
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 30982bbd31c3..aee18f84e315 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
| @@ -24,7 +24,6 @@ | |||
| 24 | #include <linux/string.h> | 24 | #include <linux/string.h> |
| 25 | #include <linux/backing-dev.h> | 25 | #include <linux/backing-dev.h> |
| 26 | #include <linux/mpage.h> | 26 | #include <linux/mpage.h> |
| 27 | #include <linux/aio.h> | ||
| 28 | #include <linux/falloc.h> | 27 | #include <linux/falloc.h> |
| 29 | #include <linux/swap.h> | 28 | #include <linux/swap.h> |
| 30 | #include <linux/writeback.h> | 29 | #include <linux/writeback.h> |
| @@ -32,6 +31,7 @@ | |||
| 32 | #include <linux/compat.h> | 31 | #include <linux/compat.h> |
| 33 | #include <linux/slab.h> | 32 | #include <linux/slab.h> |
| 34 | #include <linux/btrfs.h> | 33 | #include <linux/btrfs.h> |
| 34 | #include <linux/uio.h> | ||
| 35 | #include "ctree.h" | 35 | #include "ctree.h" |
| 36 | #include "disk-io.h" | 36 | #include "disk-io.h" |
| 37 | #include "transaction.h" | 37 | #include "transaction.h" |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index da828cf5e8f8..686331f22b15 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
| @@ -32,7 +32,6 @@ | |||
| 32 | #include <linux/writeback.h> | 32 | #include <linux/writeback.h> |
| 33 | #include <linux/statfs.h> | 33 | #include <linux/statfs.h> |
| 34 | #include <linux/compat.h> | 34 | #include <linux/compat.h> |
| 35 | #include <linux/aio.h> | ||
| 36 | #include <linux/bit_spinlock.h> | 35 | #include <linux/bit_spinlock.h> |
| 37 | #include <linux/xattr.h> | 36 | #include <linux/xattr.h> |
| 38 | #include <linux/posix_acl.h> | 37 | #include <linux/posix_acl.h> |
| @@ -43,6 +42,7 @@ | |||
| 43 | #include <linux/btrfs.h> | 42 | #include <linux/btrfs.h> |
| 44 | #include <linux/blkdev.h> | 43 | #include <linux/blkdev.h> |
| 45 | #include <linux/posix_acl_xattr.h> | 44 | #include <linux/posix_acl_xattr.h> |
| 45 | #include <linux/uio.h> | ||
| 46 | #include "ctree.h" | 46 | #include "ctree.h" |
| 47 | #include "disk-io.h" | 47 | #include "disk-io.h" |
| 48 | #include "transaction.h" | 48 | #include "transaction.h" |
| @@ -108,6 +108,13 @@ static struct extent_map *create_pinned_em(struct inode *inode, u64 start, | |||
| 108 | 108 | ||
| 109 | static int btrfs_dirty_inode(struct inode *inode); | 109 | static int btrfs_dirty_inode(struct inode *inode); |
| 110 | 110 | ||
| 111 | #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS | ||
| 112 | void btrfs_test_inode_set_ops(struct inode *inode) | ||
| 113 | { | ||
| 114 | BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; | ||
| 115 | } | ||
| 116 | #endif | ||
| 117 | |||
| 111 | static int btrfs_init_inode_security(struct btrfs_trans_handle *trans, | 118 | static int btrfs_init_inode_security(struct btrfs_trans_handle *trans, |
| 112 | struct inode *inode, struct inode *dir, | 119 | struct inode *inode, struct inode *dir, |
| 113 | const struct qstr *qstr) | 120 | const struct qstr *qstr) |
| @@ -1542,30 +1549,17 @@ static void btrfs_split_extent_hook(struct inode *inode, | |||
| 1542 | u64 new_size; | 1549 | u64 new_size; |
| 1543 | 1550 | ||
| 1544 | /* | 1551 | /* |
| 1545 | * We need the largest size of the remaining extent to see if we | 1552 | * See the explanation in btrfs_merge_extent_hook, the same |
| 1546 | * need to add a new outstanding extent. Think of the following | 1553 | * applies here, just in reverse. |
| 1547 | * case | ||
| 1548 | * | ||
| 1549 | * [MEAX_EXTENT_SIZEx2 - 4k][4k] | ||
| 1550 | * | ||
| 1551 | * The new_size would just be 4k and we'd think we had enough | ||
| 1552 | * outstanding extents for this if we only took one side of the | ||
| 1553 | * split, same goes for the other direction. We need to see if | ||
| 1554 | * the larger size still is the same amount of extents as the | ||
| 1555 | * original size, because if it is we need to add a new | ||
| 1556 | * outstanding extent. But if we split up and the larger size | ||
| 1557 | * is less than the original then we are good to go since we've | ||
| 1558 | * already accounted for the extra extent in our original | ||
| 1559 | * accounting. | ||
| 1560 | */ | 1554 | */ |
| 1561 | new_size = orig->end - split + 1; | 1555 | new_size = orig->end - split + 1; |
| 1562 | if ((split - orig->start) > new_size) | 1556 | num_extents = div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1, |
| 1563 | new_size = split - orig->start; | ||
| 1564 | |||
| 1565 | num_extents = div64_u64(size + BTRFS_MAX_EXTENT_SIZE - 1, | ||
| 1566 | BTRFS_MAX_EXTENT_SIZE); | 1557 | BTRFS_MAX_EXTENT_SIZE); |
| 1567 | if (div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1, | 1558 | new_size = split - orig->start; |
| 1568 | BTRFS_MAX_EXTENT_SIZE) < num_extents) | 1559 | num_extents += div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1, |
| 1560 | BTRFS_MAX_EXTENT_SIZE); | ||
| 1561 | if (div64_u64(size + BTRFS_MAX_EXTENT_SIZE - 1, | ||
| 1562 | BTRFS_MAX_EXTENT_SIZE) >= num_extents) | ||
| 1569 | return; | 1563 | return; |
| 1570 | } | 1564 | } |
| 1571 | 1565 | ||
| @@ -1591,8 +1585,10 @@ static void btrfs_merge_extent_hook(struct inode *inode, | |||
| 1591 | if (!(other->state & EXTENT_DELALLOC)) | 1585 | if (!(other->state & EXTENT_DELALLOC)) |
| 1592 | return; | 1586 | return; |
| 1593 | 1587 | ||
| 1594 | old_size = other->end - other->start + 1; | 1588 | if (new->start > other->start) |
| 1595 | new_size = old_size + (new->end - new->start + 1); | 1589 | new_size = new->end - other->start + 1; |
| 1590 | else | ||
| 1591 | new_size = other->end - new->start + 1; | ||
| 1596 | 1592 | ||
| 1597 | /* we're not bigger than the max, unreserve the space and go */ | 1593 | /* we're not bigger than the max, unreserve the space and go */ |
| 1598 | if (new_size <= BTRFS_MAX_EXTENT_SIZE) { | 1594 | if (new_size <= BTRFS_MAX_EXTENT_SIZE) { |
| @@ -1603,13 +1599,32 @@ static void btrfs_merge_extent_hook(struct inode *inode, | |||
| 1603 | } | 1599 | } |
| 1604 | 1600 | ||
| 1605 | /* | 1601 | /* |
| 1606 | * If we grew by another max_extent, just return, we want to keep that | 1602 | * We have to add up either side to figure out how many extents were |
| 1607 | * reserved amount. | 1603 | * accounted for before we merged into one big extent. If the number of |
| 1604 | * extents we accounted for is <= the amount we need for the new range | ||
| 1605 | * then we can return, otherwise drop. Think of it like this | ||
| 1606 | * | ||
| 1607 | * [ 4k][MAX_SIZE] | ||
| 1608 | * | ||
| 1609 | * So we've grown the extent by a MAX_SIZE extent, this would mean we | ||
| 1610 | * need 2 outstanding extents, on one side we have 1 and the other side | ||
| 1611 | * we have 1 so they are == and we can return. But in this case | ||
| 1612 | * | ||
| 1613 | * [MAX_SIZE+4k][MAX_SIZE+4k] | ||
| 1614 | * | ||
| 1615 | * Each range on their own accounts for 2 extents, but merged together | ||
| 1616 | * they are only 3 extents worth of accounting, so we need to drop in | ||
| 1617 | * this case. | ||
| 1608 | */ | 1618 | */ |
| 1619 | old_size = other->end - other->start + 1; | ||
| 1609 | num_extents = div64_u64(old_size + BTRFS_MAX_EXTENT_SIZE - 1, | 1620 | num_extents = div64_u64(old_size + BTRFS_MAX_EXTENT_SIZE - 1, |
| 1610 | BTRFS_MAX_EXTENT_SIZE); | 1621 | BTRFS_MAX_EXTENT_SIZE); |
| 1622 | old_size = new->end - new->start + 1; | ||
| 1623 | num_extents += div64_u64(old_size + BTRFS_MAX_EXTENT_SIZE - 1, | ||
| 1624 | BTRFS_MAX_EXTENT_SIZE); | ||
| 1625 | |||
| 1611 | if (div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1, | 1626 | if (div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1, |
| 1612 | BTRFS_MAX_EXTENT_SIZE) > num_extents) | 1627 | BTRFS_MAX_EXTENT_SIZE) >= num_extents) |
| 1613 | return; | 1628 | return; |
| 1614 | 1629 | ||
| 1615 | spin_lock(&BTRFS_I(inode)->lock); | 1630 | spin_lock(&BTRFS_I(inode)->lock); |
| @@ -1686,6 +1701,10 @@ static void btrfs_set_bit_hook(struct inode *inode, | |||
| 1686 | spin_unlock(&BTRFS_I(inode)->lock); | 1701 | spin_unlock(&BTRFS_I(inode)->lock); |
| 1687 | } | 1702 | } |
| 1688 | 1703 | ||
| 1704 | /* For sanity tests */ | ||
| 1705 | if (btrfs_test_is_dummy_root(root)) | ||
| 1706 | return; | ||
| 1707 | |||
| 1689 | __percpu_counter_add(&root->fs_info->delalloc_bytes, len, | 1708 | __percpu_counter_add(&root->fs_info->delalloc_bytes, len, |
| 1690 | root->fs_info->delalloc_batch); | 1709 | root->fs_info->delalloc_batch); |
| 1691 | spin_lock(&BTRFS_I(inode)->lock); | 1710 | spin_lock(&BTRFS_I(inode)->lock); |
| @@ -1741,6 +1760,10 @@ static void btrfs_clear_bit_hook(struct inode *inode, | |||
| 1741 | root != root->fs_info->tree_root) | 1760 | root != root->fs_info->tree_root) |
| 1742 | btrfs_delalloc_release_metadata(inode, len); | 1761 | btrfs_delalloc_release_metadata(inode, len); |
| 1743 | 1762 | ||
| 1763 | /* For sanity tests. */ | ||
| 1764 | if (btrfs_test_is_dummy_root(root)) | ||
| 1765 | return; | ||
| 1766 | |||
| 1744 | if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID | 1767 | if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID |
| 1745 | && do_list && !(state->state & EXTENT_NORESERVE)) | 1768 | && do_list && !(state->state & EXTENT_NORESERVE)) |
| 1746 | btrfs_free_reserved_data_space(inode, len); | 1769 | btrfs_free_reserved_data_space(inode, len); |
| @@ -7213,7 +7236,7 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, | |||
| 7213 | u64 start = iblock << inode->i_blkbits; | 7236 | u64 start = iblock << inode->i_blkbits; |
| 7214 | u64 lockstart, lockend; | 7237 | u64 lockstart, lockend; |
| 7215 | u64 len = bh_result->b_size; | 7238 | u64 len = bh_result->b_size; |
| 7216 | u64 orig_len = len; | 7239 | u64 *outstanding_extents = NULL; |
| 7217 | int unlock_bits = EXTENT_LOCKED; | 7240 | int unlock_bits = EXTENT_LOCKED; |
| 7218 | int ret = 0; | 7241 | int ret = 0; |
| 7219 | 7242 | ||
| @@ -7225,6 +7248,16 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, | |||
| 7225 | lockstart = start; | 7248 | lockstart = start; |
| 7226 | lockend = start + len - 1; | 7249 | lockend = start + len - 1; |
| 7227 | 7250 | ||
| 7251 | if (current->journal_info) { | ||
| 7252 | /* | ||
| 7253 | * Need to pull our outstanding extents and set journal_info to NULL so | ||
| 7254 | * that anything that needs to check if there's a transction doesn't get | ||
| 7255 | * confused. | ||
| 7256 | */ | ||
| 7257 | outstanding_extents = current->journal_info; | ||
| 7258 | current->journal_info = NULL; | ||
| 7259 | } | ||
| 7260 | |||
| 7228 | /* | 7261 | /* |
| 7229 | * If this errors out it's because we couldn't invalidate pagecache for | 7262 | * If this errors out it's because we couldn't invalidate pagecache for |
| 7230 | * this range and we need to fallback to buffered. | 7263 | * this range and we need to fallback to buffered. |
| @@ -7348,11 +7381,20 @@ unlock: | |||
| 7348 | if (start + len > i_size_read(inode)) | 7381 | if (start + len > i_size_read(inode)) |
| 7349 | i_size_write(inode, start + len); | 7382 | i_size_write(inode, start + len); |
| 7350 | 7383 | ||
| 7351 | if (len < orig_len) { | 7384 | /* |
| 7385 | * If we have an outstanding_extents count still set then we're | ||
| 7386 | * within our reservation, otherwise we need to adjust our inode | ||
| 7387 | * counter appropriately. | ||
| 7388 | */ | ||
| 7389 | if (*outstanding_extents) { | ||
| 7390 | (*outstanding_extents)--; | ||
| 7391 | } else { | ||
| 7352 | spin_lock(&BTRFS_I(inode)->lock); | 7392 | spin_lock(&BTRFS_I(inode)->lock); |
| 7353 | BTRFS_I(inode)->outstanding_extents++; | 7393 | BTRFS_I(inode)->outstanding_extents++; |
| 7354 | spin_unlock(&BTRFS_I(inode)->lock); | 7394 | spin_unlock(&BTRFS_I(inode)->lock); |
| 7355 | } | 7395 | } |
| 7396 | |||
| 7397 | current->journal_info = outstanding_extents; | ||
| 7356 | btrfs_free_reserved_data_space(inode, len); | 7398 | btrfs_free_reserved_data_space(inode, len); |
| 7357 | } | 7399 | } |
| 7358 | 7400 | ||
| @@ -7376,6 +7418,8 @@ unlock: | |||
| 7376 | unlock_err: | 7418 | unlock_err: |
| 7377 | clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend, | 7419 | clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend, |
| 7378 | unlock_bits, 1, 0, &cached_state, GFP_NOFS); | 7420 | unlock_bits, 1, 0, &cached_state, GFP_NOFS); |
| 7421 | if (outstanding_extents) | ||
| 7422 | current->journal_info = outstanding_extents; | ||
| 7379 | return ret; | 7423 | return ret; |
| 7380 | } | 7424 | } |
| 7381 | 7425 | ||
| @@ -8075,6 +8119,7 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, | |||
| 8075 | { | 8119 | { |
| 8076 | struct file *file = iocb->ki_filp; | 8120 | struct file *file = iocb->ki_filp; |
| 8077 | struct inode *inode = file->f_mapping->host; | 8121 | struct inode *inode = file->f_mapping->host; |
| 8122 | u64 outstanding_extents = 0; | ||
| 8078 | size_t count = 0; | 8123 | size_t count = 0; |
| 8079 | int flags = 0; | 8124 | int flags = 0; |
| 8080 | bool wakeup = true; | 8125 | bool wakeup = true; |
| @@ -8112,6 +8157,16 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, | |||
| 8112 | ret = btrfs_delalloc_reserve_space(inode, count); | 8157 | ret = btrfs_delalloc_reserve_space(inode, count); |
| 8113 | if (ret) | 8158 | if (ret) |
| 8114 | goto out; | 8159 | goto out; |
| 8160 | outstanding_extents = div64_u64(count + | ||
| 8161 | BTRFS_MAX_EXTENT_SIZE - 1, | ||
| 8162 | BTRFS_MAX_EXTENT_SIZE); | ||
| 8163 | |||
| 8164 | /* | ||
| 8165 | * We need to know how many extents we reserved so that we can | ||
| 8166 | * do the accounting properly if we go over the number we | ||
| 8167 | * originally calculated. Abuse current->journal_info for this. | ||
| 8168 | */ | ||
| 8169 | current->journal_info = &outstanding_extents; | ||
| 8115 | } else if (test_bit(BTRFS_INODE_READDIO_NEED_LOCK, | 8170 | } else if (test_bit(BTRFS_INODE_READDIO_NEED_LOCK, |
| 8116 | &BTRFS_I(inode)->runtime_flags)) { | 8171 | &BTRFS_I(inode)->runtime_flags)) { |
| 8117 | inode_dio_done(inode); | 8172 | inode_dio_done(inode); |
| @@ -8124,6 +8179,7 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, | |||
| 8124 | iter, offset, btrfs_get_blocks_direct, NULL, | 8179 | iter, offset, btrfs_get_blocks_direct, NULL, |
| 8125 | btrfs_submit_direct, flags); | 8180 | btrfs_submit_direct, flags); |
| 8126 | if (rw & WRITE) { | 8181 | if (rw & WRITE) { |
| 8182 | current->journal_info = NULL; | ||
| 8127 | if (ret < 0 && ret != -EIOCBQUEUED) | 8183 | if (ret < 0 && ret != -EIOCBQUEUED) |
| 8128 | btrfs_delalloc_release_space(inode, count); | 8184 | btrfs_delalloc_release_space(inode, count); |
| 8129 | else if (ret >= 0 && (size_t)ret < count) | 8185 | else if (ret >= 0 && (size_t)ret < count) |
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 97159a8e91d4..058c79eecbfb 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c | |||
| @@ -1259,7 +1259,7 @@ static int comp_oper(struct btrfs_qgroup_operation *oper1, | |||
| 1259 | if (oper1->seq < oper2->seq) | 1259 | if (oper1->seq < oper2->seq) |
| 1260 | return -1; | 1260 | return -1; |
| 1261 | if (oper1->seq > oper2->seq) | 1261 | if (oper1->seq > oper2->seq) |
| 1262 | return -1; | 1262 | return 1; |
| 1263 | if (oper1->ref_root < oper2->ref_root) | 1263 | if (oper1->ref_root < oper2->ref_root) |
| 1264 | return -1; | 1264 | return -1; |
| 1265 | if (oper1->ref_root > oper2->ref_root) | 1265 | if (oper1->ref_root > oper2->ref_root) |
diff --git a/fs/btrfs/tests/inode-tests.c b/fs/btrfs/tests/inode-tests.c index a116b55ce788..054fc0d97131 100644 --- a/fs/btrfs/tests/inode-tests.c +++ b/fs/btrfs/tests/inode-tests.c | |||
| @@ -911,6 +911,197 @@ out: | |||
| 911 | return ret; | 911 | return ret; |
| 912 | } | 912 | } |
| 913 | 913 | ||
| 914 | static int test_extent_accounting(void) | ||
| 915 | { | ||
| 916 | struct inode *inode = NULL; | ||
| 917 | struct btrfs_root *root = NULL; | ||
| 918 | int ret = -ENOMEM; | ||
| 919 | |||
| 920 | inode = btrfs_new_test_inode(); | ||
| 921 | if (!inode) { | ||
| 922 | test_msg("Couldn't allocate inode\n"); | ||
| 923 | return ret; | ||
| 924 | } | ||
| 925 | |||
| 926 | root = btrfs_alloc_dummy_root(); | ||
| 927 | if (IS_ERR(root)) { | ||
| 928 | test_msg("Couldn't allocate root\n"); | ||
| 929 | goto out; | ||
| 930 | } | ||
| 931 | |||
| 932 | root->fs_info = btrfs_alloc_dummy_fs_info(); | ||
| 933 | if (!root->fs_info) { | ||
| 934 | test_msg("Couldn't allocate dummy fs info\n"); | ||
| 935 | goto out; | ||
| 936 | } | ||
| 937 | |||
| 938 | BTRFS_I(inode)->root = root; | ||
| 939 | btrfs_test_inode_set_ops(inode); | ||
| 940 | |||
| 941 | /* [BTRFS_MAX_EXTENT_SIZE] */ | ||
| 942 | BTRFS_I(inode)->outstanding_extents++; | ||
| 943 | ret = btrfs_set_extent_delalloc(inode, 0, BTRFS_MAX_EXTENT_SIZE - 1, | ||
| 944 | NULL); | ||
| 945 | if (ret) { | ||
| 946 | test_msg("btrfs_set_extent_delalloc returned %d\n", ret); | ||
| 947 | goto out; | ||
| 948 | } | ||
| 949 | if (BTRFS_I(inode)->outstanding_extents != 1) { | ||
| 950 | ret = -EINVAL; | ||
| 951 | test_msg("Miscount, wanted 1, got %u\n", | ||
| 952 | BTRFS_I(inode)->outstanding_extents); | ||
| 953 | goto out; | ||
| 954 | } | ||
| 955 | |||
| 956 | /* [BTRFS_MAX_EXTENT_SIZE][4k] */ | ||
| 957 | BTRFS_I(inode)->outstanding_extents++; | ||
| 958 | ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE, | ||
| 959 | BTRFS_MAX_EXTENT_SIZE + 4095, NULL); | ||
| 960 | if (ret) { | ||
| 961 | test_msg("btrfs_set_extent_delalloc returned %d\n", ret); | ||
| 962 | goto out; | ||
| 963 | } | ||
| 964 | if (BTRFS_I(inode)->outstanding_extents != 2) { | ||
| 965 | ret = -EINVAL; | ||
| 966 | test_msg("Miscount, wanted 2, got %u\n", | ||
| 967 | BTRFS_I(inode)->outstanding_extents); | ||
| 968 | goto out; | ||
| 969 | } | ||
| 970 | |||
| 971 | /* [BTRFS_MAX_EXTENT_SIZE/2][4K HOLE][the rest] */ | ||
| 972 | ret = clear_extent_bit(&BTRFS_I(inode)->io_tree, | ||
| 973 | BTRFS_MAX_EXTENT_SIZE >> 1, | ||
| 974 | (BTRFS_MAX_EXTENT_SIZE >> 1) + 4095, | ||
| 975 | EXTENT_DELALLOC | EXTENT_DIRTY | | ||
| 976 | EXTENT_UPTODATE | EXTENT_DO_ACCOUNTING, 0, 0, | ||
| 977 | NULL, GFP_NOFS); | ||
| 978 | if (ret) { | ||
| 979 | test_msg("clear_extent_bit returned %d\n", ret); | ||
| 980 | goto out; | ||
| 981 | } | ||
| 982 | if (BTRFS_I(inode)->outstanding_extents != 2) { | ||
| 983 | ret = -EINVAL; | ||
| 984 | test_msg("Miscount, wanted 2, got %u\n", | ||
| 985 | BTRFS_I(inode)->outstanding_extents); | ||
| 986 | goto out; | ||
| 987 | } | ||
| 988 | |||
| 989 | /* [BTRFS_MAX_EXTENT_SIZE][4K] */ | ||
| 990 | BTRFS_I(inode)->outstanding_extents++; | ||
| 991 | ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE >> 1, | ||
| 992 | (BTRFS_MAX_EXTENT_SIZE >> 1) + 4095, | ||
| 993 | NULL); | ||
| 994 | if (ret) { | ||
| 995 | test_msg("btrfs_set_extent_delalloc returned %d\n", ret); | ||
| 996 | goto out; | ||
| 997 | } | ||
| 998 | if (BTRFS_I(inode)->outstanding_extents != 2) { | ||
| 999 | ret = -EINVAL; | ||
| 1000 | test_msg("Miscount, wanted 2, got %u\n", | ||
| 1001 | BTRFS_I(inode)->outstanding_extents); | ||
| 1002 | goto out; | ||
| 1003 | } | ||
| 1004 | |||
| 1005 | /* | ||
| 1006 | * [BTRFS_MAX_EXTENT_SIZE+4K][4K HOLE][BTRFS_MAX_EXTENT_SIZE+4K] | ||
| 1007 | * | ||
| 1008 | * I'm artificially adding 2 to outstanding_extents because in the | ||
| 1009 | * buffered IO case we'd add things up as we go, but I don't feel like | ||
| 1010 | * doing that here, this isn't the interesting case we want to test. | ||
| 1011 | */ | ||
| 1012 | BTRFS_I(inode)->outstanding_extents += 2; | ||
| 1013 | ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE + 8192, | ||
| 1014 | (BTRFS_MAX_EXTENT_SIZE << 1) + 12287, | ||
| 1015 | NULL); | ||
| 1016 | if (ret) { | ||
| 1017 | test_msg("btrfs_set_extent_delalloc returned %d\n", ret); | ||
| 1018 | goto out; | ||
| 1019 | } | ||
| 1020 | if (BTRFS_I(inode)->outstanding_extents != 4) { | ||
| 1021 | ret = -EINVAL; | ||
| 1022 | test_msg("Miscount, wanted 4, got %u\n", | ||
| 1023 | BTRFS_I(inode)->outstanding_extents); | ||
| 1024 | goto out; | ||
| 1025 | } | ||
| 1026 | |||
| 1027 | /* [BTRFS_MAX_EXTENT_SIZE+4k][4k][BTRFS_MAX_EXTENT_SIZE+4k] */ | ||
| 1028 | BTRFS_I(inode)->outstanding_extents++; | ||
| 1029 | ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE+4096, | ||
| 1030 | BTRFS_MAX_EXTENT_SIZE+8191, NULL); | ||
| 1031 | if (ret) { | ||
| 1032 | test_msg("btrfs_set_extent_delalloc returned %d\n", ret); | ||
| 1033 | goto out; | ||
| 1034 | } | ||
| 1035 | if (BTRFS_I(inode)->outstanding_extents != 3) { | ||
| 1036 | ret = -EINVAL; | ||
| 1037 | test_msg("Miscount, wanted 3, got %u\n", | ||
| 1038 | BTRFS_I(inode)->outstanding_extents); | ||
| 1039 | goto out; | ||
| 1040 | } | ||
| 1041 | |||
| 1042 | /* [BTRFS_MAX_EXTENT_SIZE+4k][4K HOLE][BTRFS_MAX_EXTENT_SIZE+4k] */ | ||
| 1043 | ret = clear_extent_bit(&BTRFS_I(inode)->io_tree, | ||
| 1044 | BTRFS_MAX_EXTENT_SIZE+4096, | ||
| 1045 | BTRFS_MAX_EXTENT_SIZE+8191, | ||
| 1046 | EXTENT_DIRTY | EXTENT_DELALLOC | | ||
| 1047 | EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0, | ||
| 1048 | NULL, GFP_NOFS); | ||
| 1049 | if (ret) { | ||
| 1050 | test_msg("clear_extent_bit returned %d\n", ret); | ||
| 1051 | goto out; | ||
| 1052 | } | ||
| 1053 | if (BTRFS_I(inode)->outstanding_extents != 4) { | ||
| 1054 | ret = -EINVAL; | ||
| 1055 | test_msg("Miscount, wanted 4, got %u\n", | ||
| 1056 | BTRFS_I(inode)->outstanding_extents); | ||
| 1057 | goto out; | ||
| 1058 | } | ||
| 1059 | |||
| 1060 | /* | ||
| 1061 | * Refill the hole again just for good measure, because I thought it | ||
| 1062 | * might fail and I'd rather satisfy my paranoia at this point. | ||
| 1063 | */ | ||
| 1064 | BTRFS_I(inode)->outstanding_extents++; | ||
| 1065 | ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE+4096, | ||
| 1066 | BTRFS_MAX_EXTENT_SIZE+8191, NULL); | ||
| 1067 | if (ret) { | ||
| 1068 | test_msg("btrfs_set_extent_delalloc returned %d\n", ret); | ||
| 1069 | goto out; | ||
| 1070 | } | ||
| 1071 | if (BTRFS_I(inode)->outstanding_extents != 3) { | ||
| 1072 | ret = -EINVAL; | ||
| 1073 | test_msg("Miscount, wanted 3, got %u\n", | ||
| 1074 | BTRFS_I(inode)->outstanding_extents); | ||
| 1075 | goto out; | ||
| 1076 | } | ||
| 1077 | |||
| 1078 | /* Empty */ | ||
| 1079 | ret = clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1, | ||
| 1080 | EXTENT_DIRTY | EXTENT_DELALLOC | | ||
| 1081 | EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0, | ||
| 1082 | NULL, GFP_NOFS); | ||
| 1083 | if (ret) { | ||
| 1084 | test_msg("clear_extent_bit returned %d\n", ret); | ||
| 1085 | goto out; | ||
| 1086 | } | ||
| 1087 | if (BTRFS_I(inode)->outstanding_extents) { | ||
| 1088 | ret = -EINVAL; | ||
| 1089 | test_msg("Miscount, wanted 0, got %u\n", | ||
| 1090 | BTRFS_I(inode)->outstanding_extents); | ||
| 1091 | goto out; | ||
| 1092 | } | ||
| 1093 | ret = 0; | ||
| 1094 | out: | ||
| 1095 | if (ret) | ||
| 1096 | clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1, | ||
| 1097 | EXTENT_DIRTY | EXTENT_DELALLOC | | ||
| 1098 | EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0, | ||
| 1099 | NULL, GFP_NOFS); | ||
| 1100 | iput(inode); | ||
| 1101 | btrfs_free_dummy_root(root); | ||
| 1102 | return ret; | ||
| 1103 | } | ||
| 1104 | |||
| 914 | int btrfs_test_inodes(void) | 1105 | int btrfs_test_inodes(void) |
| 915 | { | 1106 | { |
| 916 | int ret; | 1107 | int ret; |
| @@ -924,5 +1115,9 @@ int btrfs_test_inodes(void) | |||
| 924 | if (ret) | 1115 | if (ret) |
| 925 | return ret; | 1116 | return ret; |
| 926 | test_msg("Running hole first btrfs_get_extent test\n"); | 1117 | test_msg("Running hole first btrfs_get_extent test\n"); |
| 927 | return test_hole_first(); | 1118 | ret = test_hole_first(); |
| 1119 | if (ret) | ||
| 1120 | return ret; | ||
| 1121 | test_msg("Running outstanding_extents tests\n"); | ||
| 1122 | return test_extent_accounting(); | ||
| 928 | } | 1123 | } |
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 88e51aded6bd..8be4278e25e8 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c | |||
| @@ -1023,17 +1023,13 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans, | |||
| 1023 | u64 old_root_bytenr; | 1023 | u64 old_root_bytenr; |
| 1024 | u64 old_root_used; | 1024 | u64 old_root_used; |
| 1025 | struct btrfs_root *tree_root = root->fs_info->tree_root; | 1025 | struct btrfs_root *tree_root = root->fs_info->tree_root; |
| 1026 | bool extent_root = (root->objectid == BTRFS_EXTENT_TREE_OBJECTID); | ||
| 1027 | 1026 | ||
| 1028 | old_root_used = btrfs_root_used(&root->root_item); | 1027 | old_root_used = btrfs_root_used(&root->root_item); |
| 1029 | btrfs_write_dirty_block_groups(trans, root); | ||
| 1030 | 1028 | ||
| 1031 | while (1) { | 1029 | while (1) { |
| 1032 | old_root_bytenr = btrfs_root_bytenr(&root->root_item); | 1030 | old_root_bytenr = btrfs_root_bytenr(&root->root_item); |
| 1033 | if (old_root_bytenr == root->node->start && | 1031 | if (old_root_bytenr == root->node->start && |
| 1034 | old_root_used == btrfs_root_used(&root->root_item) && | 1032 | old_root_used == btrfs_root_used(&root->root_item)) |
| 1035 | (!extent_root || | ||
| 1036 | list_empty(&trans->transaction->dirty_bgs))) | ||
| 1037 | break; | 1033 | break; |
| 1038 | 1034 | ||
| 1039 | btrfs_set_root_node(&root->root_item, root->node); | 1035 | btrfs_set_root_node(&root->root_item, root->node); |
| @@ -1044,14 +1040,6 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans, | |||
| 1044 | return ret; | 1040 | return ret; |
| 1045 | 1041 | ||
| 1046 | old_root_used = btrfs_root_used(&root->root_item); | 1042 | old_root_used = btrfs_root_used(&root->root_item); |
| 1047 | if (extent_root) { | ||
| 1048 | ret = btrfs_write_dirty_block_groups(trans, root); | ||
| 1049 | if (ret) | ||
| 1050 | return ret; | ||
| 1051 | } | ||
| 1052 | ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); | ||
| 1053 | if (ret) | ||
| 1054 | return ret; | ||
| 1055 | } | 1043 | } |
| 1056 | 1044 | ||
| 1057 | return 0; | 1045 | return 0; |
| @@ -1068,6 +1056,7 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans, | |||
| 1068 | struct btrfs_root *root) | 1056 | struct btrfs_root *root) |
| 1069 | { | 1057 | { |
| 1070 | struct btrfs_fs_info *fs_info = root->fs_info; | 1058 | struct btrfs_fs_info *fs_info = root->fs_info; |
| 1059 | struct list_head *dirty_bgs = &trans->transaction->dirty_bgs; | ||
| 1071 | struct list_head *next; | 1060 | struct list_head *next; |
| 1072 | struct extent_buffer *eb; | 1061 | struct extent_buffer *eb; |
| 1073 | int ret; | 1062 | int ret; |
| @@ -1095,11 +1084,15 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans, | |||
| 1095 | if (ret) | 1084 | if (ret) |
| 1096 | return ret; | 1085 | return ret; |
| 1097 | 1086 | ||
| 1087 | ret = btrfs_setup_space_cache(trans, root); | ||
| 1088 | if (ret) | ||
| 1089 | return ret; | ||
| 1090 | |||
| 1098 | /* run_qgroups might have added some more refs */ | 1091 | /* run_qgroups might have added some more refs */ |
| 1099 | ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); | 1092 | ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); |
| 1100 | if (ret) | 1093 | if (ret) |
| 1101 | return ret; | 1094 | return ret; |
| 1102 | 1095 | again: | |
| 1103 | while (!list_empty(&fs_info->dirty_cowonly_roots)) { | 1096 | while (!list_empty(&fs_info->dirty_cowonly_roots)) { |
| 1104 | next = fs_info->dirty_cowonly_roots.next; | 1097 | next = fs_info->dirty_cowonly_roots.next; |
| 1105 | list_del_init(next); | 1098 | list_del_init(next); |
| @@ -1112,8 +1105,23 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans, | |||
| 1112 | ret = update_cowonly_root(trans, root); | 1105 | ret = update_cowonly_root(trans, root); |
| 1113 | if (ret) | 1106 | if (ret) |
| 1114 | return ret; | 1107 | return ret; |
| 1108 | ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); | ||
| 1109 | if (ret) | ||
| 1110 | return ret; | ||
| 1115 | } | 1111 | } |
| 1116 | 1112 | ||
| 1113 | while (!list_empty(dirty_bgs)) { | ||
| 1114 | ret = btrfs_write_dirty_block_groups(trans, root); | ||
| 1115 | if (ret) | ||
| 1116 | return ret; | ||
| 1117 | ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); | ||
| 1118 | if (ret) | ||
| 1119 | return ret; | ||
| 1120 | } | ||
| 1121 | |||
| 1122 | if (!list_empty(&fs_info->dirty_cowonly_roots)) | ||
| 1123 | goto again; | ||
| 1124 | |||
| 1117 | list_add_tail(&fs_info->extent_root->dirty_list, | 1125 | list_add_tail(&fs_info->extent_root->dirty_list, |
| 1118 | &trans->transaction->switch_commits); | 1126 | &trans->transaction->switch_commits); |
| 1119 | btrfs_after_dev_replace_commit(fs_info); | 1127 | btrfs_after_dev_replace_commit(fs_info); |
| @@ -1811,6 +1819,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
| 1811 | 1819 | ||
| 1812 | wait_for_commit(root, cur_trans); | 1820 | wait_for_commit(root, cur_trans); |
| 1813 | 1821 | ||
| 1822 | if (unlikely(cur_trans->aborted)) | ||
| 1823 | ret = cur_trans->aborted; | ||
| 1824 | |||
| 1814 | btrfs_put_transaction(cur_trans); | 1825 | btrfs_put_transaction(cur_trans); |
| 1815 | 1826 | ||
| 1816 | return ret; | 1827 | return ret; |
diff --git a/fs/buffer.c b/fs/buffer.c index 20805db2c987..c7a5602d01ee 100644 --- a/fs/buffer.c +++ b/fs/buffer.c | |||
| @@ -3243,8 +3243,8 @@ int try_to_free_buffers(struct page *page) | |||
| 3243 | * to synchronise against __set_page_dirty_buffers and prevent the | 3243 | * to synchronise against __set_page_dirty_buffers and prevent the |
| 3244 | * dirty bit from being lost. | 3244 | * dirty bit from being lost. |
| 3245 | */ | 3245 | */ |
| 3246 | if (ret) | 3246 | if (ret && TestClearPageDirty(page)) |
| 3247 | cancel_dirty_page(page, PAGE_CACHE_SIZE); | 3247 | account_page_cleaned(page, mapping); |
| 3248 | spin_unlock(&mapping->private_lock); | 3248 | spin_unlock(&mapping->private_lock); |
| 3249 | out: | 3249 | out: |
| 3250 | if (buffers_to_free) { | 3250 | if (buffers_to_free) { |
diff --git a/fs/ceph/file.c b/fs/ceph/file.c index d533075a823d..139f2fea91a0 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c | |||
| @@ -7,7 +7,6 @@ | |||
| 7 | #include <linux/mount.h> | 7 | #include <linux/mount.h> |
| 8 | #include <linux/namei.h> | 8 | #include <linux/namei.h> |
| 9 | #include <linux/writeback.h> | 9 | #include <linux/writeback.h> |
| 10 | #include <linux/aio.h> | ||
| 11 | #include <linux/falloc.h> | 10 | #include <linux/falloc.h> |
| 12 | 11 | ||
| 13 | #include "super.h" | 12 | #include "super.h" |
| @@ -808,7 +807,7 @@ static ssize_t ceph_read_iter(struct kiocb *iocb, struct iov_iter *to) | |||
| 808 | { | 807 | { |
| 809 | struct file *filp = iocb->ki_filp; | 808 | struct file *filp = iocb->ki_filp; |
| 810 | struct ceph_file_info *fi = filp->private_data; | 809 | struct ceph_file_info *fi = filp->private_data; |
| 811 | size_t len = iocb->ki_nbytes; | 810 | size_t len = iov_iter_count(to); |
| 812 | struct inode *inode = file_inode(filp); | 811 | struct inode *inode = file_inode(filp); |
| 813 | struct ceph_inode_info *ci = ceph_inode(inode); | 812 | struct ceph_inode_info *ci = ceph_inode(inode); |
| 814 | struct page *pinned_page = NULL; | 813 | struct page *pinned_page = NULL; |
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c index 4ac7445e6ec7..aa0dc2573374 100644 --- a/fs/cifs/cifsencrypt.c +++ b/fs/cifs/cifsencrypt.c | |||
| @@ -1,6 +1,9 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * fs/cifs/cifsencrypt.c | 2 | * fs/cifs/cifsencrypt.c |
| 3 | * | 3 | * |
| 4 | * Encryption and hashing operations relating to NTLM, NTLMv2. See MS-NLMP | ||
| 5 | * for more detailed information | ||
| 6 | * | ||
| 4 | * Copyright (C) International Business Machines Corp., 2005,2013 | 7 | * Copyright (C) International Business Machines Corp., 2005,2013 |
| 5 | * Author(s): Steve French (sfrench@us.ibm.com) | 8 | * Author(s): Steve French (sfrench@us.ibm.com) |
| 6 | * | 9 | * |
| @@ -515,7 +518,8 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash, | |||
| 515 | __func__); | 518 | __func__); |
| 516 | return rc; | 519 | return rc; |
| 517 | } | 520 | } |
| 518 | } else if (ses->serverName) { | 521 | } else { |
| 522 | /* We use ses->serverName if no domain name available */ | ||
| 519 | len = strlen(ses->serverName); | 523 | len = strlen(ses->serverName); |
| 520 | 524 | ||
| 521 | server = kmalloc(2 + (len * 2), GFP_KERNEL); | 525 | server = kmalloc(2 + (len * 2), GFP_KERNEL); |
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index d3aa999ab785..f3bfe08e177b 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c | |||
| @@ -773,8 +773,7 @@ static void clean_demultiplex_info(struct TCP_Server_Info *server) | |||
| 773 | 773 | ||
| 774 | length = atomic_dec_return(&tcpSesAllocCount); | 774 | length = atomic_dec_return(&tcpSesAllocCount); |
| 775 | if (length > 0) | 775 | if (length > 0) |
| 776 | mempool_resize(cifs_req_poolp, length + cifs_min_rcv, | 776 | mempool_resize(cifs_req_poolp, length + cifs_min_rcv); |
| 777 | GFP_KERNEL); | ||
| 778 | } | 777 | } |
| 779 | 778 | ||
| 780 | static int | 779 | static int |
| @@ -848,8 +847,7 @@ cifs_demultiplex_thread(void *p) | |||
| 848 | 847 | ||
| 849 | length = atomic_inc_return(&tcpSesAllocCount); | 848 | length = atomic_inc_return(&tcpSesAllocCount); |
| 850 | if (length > 1) | 849 | if (length > 1) |
| 851 | mempool_resize(cifs_req_poolp, length + cifs_min_rcv, | 850 | mempool_resize(cifs_req_poolp, length + cifs_min_rcv); |
| 852 | GFP_KERNEL); | ||
| 853 | 851 | ||
| 854 | set_freezable(); | 852 | set_freezable(); |
| 855 | while (server->tcpStatus != CifsExiting) { | 853 | while (server->tcpStatus != CifsExiting) { |
| @@ -1599,6 +1597,8 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, | |||
| 1599 | pr_warn("CIFS: username too long\n"); | 1597 | pr_warn("CIFS: username too long\n"); |
| 1600 | goto cifs_parse_mount_err; | 1598 | goto cifs_parse_mount_err; |
| 1601 | } | 1599 | } |
| 1600 | |||
| 1601 | kfree(vol->username); | ||
| 1602 | vol->username = kstrdup(string, GFP_KERNEL); | 1602 | vol->username = kstrdup(string, GFP_KERNEL); |
| 1603 | if (!vol->username) | 1603 | if (!vol->username) |
| 1604 | goto cifs_parse_mount_err; | 1604 | goto cifs_parse_mount_err; |
| @@ -1700,6 +1700,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, | |||
| 1700 | goto cifs_parse_mount_err; | 1700 | goto cifs_parse_mount_err; |
| 1701 | } | 1701 | } |
| 1702 | 1702 | ||
| 1703 | kfree(vol->domainname); | ||
| 1703 | vol->domainname = kstrdup(string, GFP_KERNEL); | 1704 | vol->domainname = kstrdup(string, GFP_KERNEL); |
| 1704 | if (!vol->domainname) { | 1705 | if (!vol->domainname) { |
| 1705 | pr_warn("CIFS: no memory for domainname\n"); | 1706 | pr_warn("CIFS: no memory for domainname\n"); |
| @@ -1731,6 +1732,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, | |||
| 1731 | } | 1732 | } |
| 1732 | 1733 | ||
| 1733 | if (strncasecmp(string, "default", 7) != 0) { | 1734 | if (strncasecmp(string, "default", 7) != 0) { |
| 1735 | kfree(vol->iocharset); | ||
| 1734 | vol->iocharset = kstrdup(string, | 1736 | vol->iocharset = kstrdup(string, |
| 1735 | GFP_KERNEL); | 1737 | GFP_KERNEL); |
| 1736 | if (!vol->iocharset) { | 1738 | if (!vol->iocharset) { |
| @@ -2913,8 +2915,7 @@ ip_rfc1001_connect(struct TCP_Server_Info *server) | |||
| 2913 | * calling name ends in null (byte 16) from old smb | 2915 | * calling name ends in null (byte 16) from old smb |
| 2914 | * convention. | 2916 | * convention. |
| 2915 | */ | 2917 | */ |
| 2916 | if (server->workstation_RFC1001_name && | 2918 | if (server->workstation_RFC1001_name[0] != 0) |
| 2917 | server->workstation_RFC1001_name[0] != 0) | ||
| 2918 | rfc1002mangle(ses_init_buf->trailer. | 2919 | rfc1002mangle(ses_init_buf->trailer. |
| 2919 | session_req.calling_name, | 2920 | session_req.calling_name, |
| 2920 | server->workstation_RFC1001_name, | 2921 | server->workstation_RFC1001_name, |
| @@ -3692,6 +3693,12 @@ CIFSTCon(const unsigned int xid, struct cifs_ses *ses, | |||
| 3692 | #endif /* CIFS_WEAK_PW_HASH */ | 3693 | #endif /* CIFS_WEAK_PW_HASH */ |
| 3693 | rc = SMBNTencrypt(tcon->password, ses->server->cryptkey, | 3694 | rc = SMBNTencrypt(tcon->password, ses->server->cryptkey, |
| 3694 | bcc_ptr, nls_codepage); | 3695 | bcc_ptr, nls_codepage); |
| 3696 | if (rc) { | ||
| 3697 | cifs_dbg(FYI, "%s Can't generate NTLM rsp. Error: %d\n", | ||
| 3698 | __func__, rc); | ||
| 3699 | cifs_buf_release(smb_buffer); | ||
| 3700 | return rc; | ||
| 3701 | } | ||
| 3695 | 3702 | ||
| 3696 | bcc_ptr += CIFS_AUTH_RESP_SIZE; | 3703 | bcc_ptr += CIFS_AUTH_RESP_SIZE; |
| 3697 | if (ses->capabilities & CAP_UNICODE) { | 3704 | if (ses->capabilities & CAP_UNICODE) { |
diff --git a/fs/cifs/file.c b/fs/cifs/file.c index a94b3e673182..ca30c391a894 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c | |||
| @@ -1823,6 +1823,7 @@ refind_writable: | |||
| 1823 | cifsFileInfo_put(inv_file); | 1823 | cifsFileInfo_put(inv_file); |
| 1824 | spin_lock(&cifs_file_list_lock); | 1824 | spin_lock(&cifs_file_list_lock); |
| 1825 | ++refind; | 1825 | ++refind; |
| 1826 | inv_file = NULL; | ||
| 1826 | goto refind_writable; | 1827 | goto refind_writable; |
| 1827 | } | 1828 | } |
| 1828 | } | 1829 | } |
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 2d4f37235ed0..3e126d7bb2ea 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c | |||
| @@ -771,6 +771,8 @@ cifs_get_inode_info(struct inode **inode, const char *full_path, | |||
| 771 | cifs_buf_release(srchinf->ntwrk_buf_start); | 771 | cifs_buf_release(srchinf->ntwrk_buf_start); |
| 772 | } | 772 | } |
| 773 | kfree(srchinf); | 773 | kfree(srchinf); |
| 774 | if (rc) | ||
| 775 | goto cgii_exit; | ||
| 774 | } else | 776 | } else |
| 775 | goto cgii_exit; | 777 | goto cgii_exit; |
| 776 | 778 | ||
diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c index 689f035915cf..22dfdf17d065 100644 --- a/fs/cifs/smb2misc.c +++ b/fs/cifs/smb2misc.c | |||
| @@ -322,7 +322,7 @@ smb2_get_data_area_len(int *off, int *len, struct smb2_hdr *hdr) | |||
| 322 | 322 | ||
| 323 | /* return pointer to beginning of data area, ie offset from SMB start */ | 323 | /* return pointer to beginning of data area, ie offset from SMB start */ |
| 324 | if ((*off != 0) && (*len != 0)) | 324 | if ((*off != 0) && (*len != 0)) |
| 325 | return hdr->ProtocolId + *off; | 325 | return (char *)(&hdr->ProtocolId[0]) + *off; |
| 326 | else | 326 | else |
| 327 | return NULL; | 327 | return NULL; |
| 328 | } | 328 | } |
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 96b5d40a2ece..eab05e1aa587 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c | |||
| @@ -684,7 +684,8 @@ smb2_clone_range(const unsigned int xid, | |||
| 684 | 684 | ||
| 685 | /* No need to change MaxChunks since already set to 1 */ | 685 | /* No need to change MaxChunks since already set to 1 */ |
| 686 | chunk_sizes_updated = true; | 686 | chunk_sizes_updated = true; |
| 687 | } | 687 | } else |
| 688 | goto cchunk_out; | ||
| 688 | } | 689 | } |
| 689 | 690 | ||
| 690 | cchunk_out: | 691 | cchunk_out: |
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 3417340bf89e..65cd7a84c8bc 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c | |||
| @@ -1218,7 +1218,7 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, | |||
| 1218 | struct smb2_ioctl_req *req; | 1218 | struct smb2_ioctl_req *req; |
| 1219 | struct smb2_ioctl_rsp *rsp; | 1219 | struct smb2_ioctl_rsp *rsp; |
| 1220 | struct TCP_Server_Info *server; | 1220 | struct TCP_Server_Info *server; |
| 1221 | struct cifs_ses *ses = tcon->ses; | 1221 | struct cifs_ses *ses; |
| 1222 | struct kvec iov[2]; | 1222 | struct kvec iov[2]; |
| 1223 | int resp_buftype; | 1223 | int resp_buftype; |
| 1224 | int num_iovecs; | 1224 | int num_iovecs; |
| @@ -1233,6 +1233,11 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, | |||
| 1233 | if (plen) | 1233 | if (plen) |
| 1234 | *plen = 0; | 1234 | *plen = 0; |
| 1235 | 1235 | ||
| 1236 | if (tcon) | ||
| 1237 | ses = tcon->ses; | ||
| 1238 | else | ||
| 1239 | return -EIO; | ||
| 1240 | |||
| 1236 | if (ses && (ses->server)) | 1241 | if (ses && (ses->server)) |
| 1237 | server = ses->server; | 1242 | server = ses->server; |
| 1238 | else | 1243 | else |
| @@ -1296,14 +1301,12 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, | |||
| 1296 | rsp = (struct smb2_ioctl_rsp *)iov[0].iov_base; | 1301 | rsp = (struct smb2_ioctl_rsp *)iov[0].iov_base; |
| 1297 | 1302 | ||
| 1298 | if ((rc != 0) && (rc != -EINVAL)) { | 1303 | if ((rc != 0) && (rc != -EINVAL)) { |
| 1299 | if (tcon) | 1304 | cifs_stats_fail_inc(tcon, SMB2_IOCTL_HE); |
| 1300 | cifs_stats_fail_inc(tcon, SMB2_IOCTL_HE); | ||
| 1301 | goto ioctl_exit; | 1305 | goto ioctl_exit; |
| 1302 | } else if (rc == -EINVAL) { | 1306 | } else if (rc == -EINVAL) { |
| 1303 | if ((opcode != FSCTL_SRV_COPYCHUNK_WRITE) && | 1307 | if ((opcode != FSCTL_SRV_COPYCHUNK_WRITE) && |
| 1304 | (opcode != FSCTL_SRV_COPYCHUNK)) { | 1308 | (opcode != FSCTL_SRV_COPYCHUNK)) { |
| 1305 | if (tcon) | 1309 | cifs_stats_fail_inc(tcon, SMB2_IOCTL_HE); |
| 1306 | cifs_stats_fail_inc(tcon, SMB2_IOCTL_HE); | ||
| 1307 | goto ioctl_exit; | 1310 | goto ioctl_exit; |
| 1308 | } | 1311 | } |
| 1309 | } | 1312 | } |
| @@ -1629,7 +1632,7 @@ SMB2_flush(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, | |||
| 1629 | 1632 | ||
| 1630 | rc = SendReceive2(xid, ses, iov, 1, &resp_buftype, 0); | 1633 | rc = SendReceive2(xid, ses, iov, 1, &resp_buftype, 0); |
| 1631 | 1634 | ||
| 1632 | if ((rc != 0) && tcon) | 1635 | if (rc != 0) |
| 1633 | cifs_stats_fail_inc(tcon, SMB2_FLUSH_HE); | 1636 | cifs_stats_fail_inc(tcon, SMB2_FLUSH_HE); |
| 1634 | 1637 | ||
| 1635 | free_rsp_buf(resp_buftype, iov[0].iov_base); | 1638 | free_rsp_buf(resp_buftype, iov[0].iov_base); |
| @@ -2114,7 +2117,7 @@ SMB2_query_directory(const unsigned int xid, struct cifs_tcon *tcon, | |||
| 2114 | struct kvec iov[2]; | 2117 | struct kvec iov[2]; |
| 2115 | int rc = 0; | 2118 | int rc = 0; |
| 2116 | int len; | 2119 | int len; |
| 2117 | int resp_buftype; | 2120 | int resp_buftype = CIFS_NO_BUFFER; |
| 2118 | unsigned char *bufptr; | 2121 | unsigned char *bufptr; |
| 2119 | struct TCP_Server_Info *server; | 2122 | struct TCP_Server_Info *server; |
| 2120 | struct cifs_ses *ses = tcon->ses; | 2123 | struct cifs_ses *ses = tcon->ses; |
diff --git a/fs/dcache.c b/fs/dcache.c index c71e3732e53b..d99736a63e3c 100644 --- a/fs/dcache.c +++ b/fs/dcache.c | |||
| @@ -2690,7 +2690,7 @@ static int __d_unalias(struct inode *inode, | |||
| 2690 | struct dentry *dentry, struct dentry *alias) | 2690 | struct dentry *dentry, struct dentry *alias) |
| 2691 | { | 2691 | { |
| 2692 | struct mutex *m1 = NULL, *m2 = NULL; | 2692 | struct mutex *m1 = NULL, *m2 = NULL; |
| 2693 | int ret = -EBUSY; | 2693 | int ret = -ESTALE; |
| 2694 | 2694 | ||
| 2695 | /* If alias and dentry share a parent, then no extra locks required */ | 2695 | /* If alias and dentry share a parent, then no extra locks required */ |
| 2696 | if (alias->d_parent == dentry->d_parent) | 2696 | if (alias->d_parent == dentry->d_parent) |
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index 96400ab42d13..61e72d44cf94 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c | |||
| @@ -254,6 +254,9 @@ static struct dentry *start_creating(const char *name, struct dentry *parent) | |||
| 254 | 254 | ||
| 255 | pr_debug("debugfs: creating file '%s'\n",name); | 255 | pr_debug("debugfs: creating file '%s'\n",name); |
| 256 | 256 | ||
| 257 | if (IS_ERR(parent)) | ||
| 258 | return parent; | ||
| 259 | |||
| 257 | error = simple_pin_fs(&debug_fs_type, &debugfs_mount, | 260 | error = simple_pin_fs(&debug_fs_type, &debugfs_mount, |
| 258 | &debugfs_mount_count); | 261 | &debugfs_mount_count); |
| 259 | if (error) | 262 | if (error) |
diff --git a/fs/direct-io.c b/fs/direct-io.c index e181b6b2e297..6fb00e3f1059 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c | |||
| @@ -37,7 +37,6 @@ | |||
| 37 | #include <linux/uio.h> | 37 | #include <linux/uio.h> |
| 38 | #include <linux/atomic.h> | 38 | #include <linux/atomic.h> |
| 39 | #include <linux/prefetch.h> | 39 | #include <linux/prefetch.h> |
| 40 | #include <linux/aio.h> | ||
| 41 | 40 | ||
| 42 | /* | 41 | /* |
| 43 | * How many user pages to map in one call to get_user_pages(). This determines | 42 | * How many user pages to map in one call to get_user_pages(). This determines |
| @@ -265,7 +264,7 @@ static ssize_t dio_complete(struct dio *dio, loff_t offset, ssize_t ret, | |||
| 265 | ret = err; | 264 | ret = err; |
| 266 | } | 265 | } |
| 267 | 266 | ||
| 268 | aio_complete(dio->iocb, ret, 0); | 267 | dio->iocb->ki_complete(dio->iocb, ret, 0); |
| 269 | } | 268 | } |
| 270 | 269 | ||
| 271 | kmem_cache_free(dio_cache, dio); | 270 | kmem_cache_free(dio_cache, dio); |
| @@ -1056,7 +1055,7 @@ static inline int drop_refcount(struct dio *dio) | |||
| 1056 | * operation. AIO can if it was a broken operation described above or | 1055 | * operation. AIO can if it was a broken operation described above or |
| 1057 | * in fact if all the bios race to complete before we get here. In | 1056 | * in fact if all the bios race to complete before we get here. In |
| 1058 | * that case dio_complete() translates the EIOCBQUEUED into the proper | 1057 | * that case dio_complete() translates the EIOCBQUEUED into the proper |
| 1059 | * return code that the caller will hand to aio_complete(). | 1058 | * return code that the caller will hand to ->complete(). |
| 1060 | * | 1059 | * |
| 1061 | * This is managed by the bio_lock instead of being an atomic_t so that | 1060 | * This is managed by the bio_lock instead of being an atomic_t so that |
| 1062 | * completion paths can drop their ref and use the remaining count to | 1061 | * completion paths can drop their ref and use the remaining count to |
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c index fd39bad6f1bd..79675089443d 100644 --- a/fs/ecryptfs/file.c +++ b/fs/ecryptfs/file.c | |||
| @@ -31,7 +31,6 @@ | |||
| 31 | #include <linux/security.h> | 31 | #include <linux/security.h> |
| 32 | #include <linux/compat.h> | 32 | #include <linux/compat.h> |
| 33 | #include <linux/fs_stack.h> | 33 | #include <linux/fs_stack.h> |
| 34 | #include <linux/aio.h> | ||
| 35 | #include "ecryptfs_kernel.h" | 34 | #include "ecryptfs_kernel.h" |
| 36 | 35 | ||
| 37 | /** | 36 | /** |
| @@ -52,12 +51,6 @@ static ssize_t ecryptfs_read_update_atime(struct kiocb *iocb, | |||
| 52 | struct file *file = iocb->ki_filp; | 51 | struct file *file = iocb->ki_filp; |
| 53 | 52 | ||
| 54 | rc = generic_file_read_iter(iocb, to); | 53 | rc = generic_file_read_iter(iocb, to); |
| 55 | /* | ||
| 56 | * Even though this is a async interface, we need to wait | ||
| 57 | * for IO to finish to update atime | ||
| 58 | */ | ||
| 59 | if (-EIOCBQUEUED == rc) | ||
| 60 | rc = wait_on_sync_kiocb(iocb); | ||
| 61 | if (rc >= 0) { | 54 | if (rc >= 0) { |
| 62 | path = ecryptfs_dentry_to_lower_path(file->f_path.dentry); | 55 | path = ecryptfs_dentry_to_lower_path(file->f_path.dentry); |
| 63 | touch_atime(path); | 56 | touch_atime(path); |
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 6434bc000125..df9d6afbc5d5 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c | |||
| @@ -31,7 +31,7 @@ | |||
| 31 | #include <linux/mpage.h> | 31 | #include <linux/mpage.h> |
| 32 | #include <linux/fiemap.h> | 32 | #include <linux/fiemap.h> |
| 33 | #include <linux/namei.h> | 33 | #include <linux/namei.h> |
| 34 | #include <linux/aio.h> | 34 | #include <linux/uio.h> |
| 35 | #include "ext2.h" | 35 | #include "ext2.h" |
| 36 | #include "acl.h" | 36 | #include "acl.h" |
| 37 | #include "xattr.h" | 37 | #include "xattr.h" |
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 2c6ccc49ba27..db07ffbe7c85 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c | |||
| @@ -27,7 +27,7 @@ | |||
| 27 | #include <linux/writeback.h> | 27 | #include <linux/writeback.h> |
| 28 | #include <linux/mpage.h> | 28 | #include <linux/mpage.h> |
| 29 | #include <linux/namei.h> | 29 | #include <linux/namei.h> |
| 30 | #include <linux/aio.h> | 30 | #include <linux/uio.h> |
| 31 | #include "ext3.h" | 31 | #include "ext3.h" |
| 32 | #include "xattr.h" | 32 | #include "xattr.h" |
| 33 | #include "acl.h" | 33 | #include "acl.h" |
diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 33a09da16c9c..598abbbe6786 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c | |||
| @@ -23,9 +23,9 @@ | |||
| 23 | #include <linux/jbd2.h> | 23 | #include <linux/jbd2.h> |
| 24 | #include <linux/mount.h> | 24 | #include <linux/mount.h> |
| 25 | #include <linux/path.h> | 25 | #include <linux/path.h> |
| 26 | #include <linux/aio.h> | ||
| 27 | #include <linux/quotaops.h> | 26 | #include <linux/quotaops.h> |
| 28 | #include <linux/pagevec.h> | 27 | #include <linux/pagevec.h> |
| 28 | #include <linux/uio.h> | ||
| 29 | #include "ext4.h" | 29 | #include "ext4.h" |
| 30 | #include "ext4_jbd2.h" | 30 | #include "ext4_jbd2.h" |
| 31 | #include "xattr.h" | 31 | #include "xattr.h" |
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c index 45fe924f82bc..740c7871c117 100644 --- a/fs/ext4/indirect.c +++ b/fs/ext4/indirect.c | |||
| @@ -20,9 +20,9 @@ | |||
| 20 | * (sct@redhat.com), 1993, 1998 | 20 | * (sct@redhat.com), 1993, 1998 |
| 21 | */ | 21 | */ |
| 22 | 22 | ||
| 23 | #include <linux/aio.h> | ||
| 24 | #include "ext4_jbd2.h" | 23 | #include "ext4_jbd2.h" |
| 25 | #include "truncate.h" | 24 | #include "truncate.h" |
| 25 | #include <linux/uio.h> | ||
| 26 | 26 | ||
| 27 | #include <trace/events/ext4.h> | 27 | #include <trace/events/ext4.h> |
| 28 | 28 | ||
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 5cb9a212b86f..a3f451370bef 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
| @@ -37,7 +37,6 @@ | |||
| 37 | #include <linux/printk.h> | 37 | #include <linux/printk.h> |
| 38 | #include <linux/slab.h> | 38 | #include <linux/slab.h> |
| 39 | #include <linux/ratelimit.h> | 39 | #include <linux/ratelimit.h> |
| 40 | #include <linux/aio.h> | ||
| 41 | #include <linux/bitops.h> | 40 | #include <linux/bitops.h> |
| 42 | 41 | ||
| 43 | #include "ext4_jbd2.h" | 42 | #include "ext4_jbd2.h" |
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index b24a2541a9ba..464984261e69 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c | |||
| @@ -18,7 +18,6 @@ | |||
| 18 | #include <linux/pagevec.h> | 18 | #include <linux/pagevec.h> |
| 19 | #include <linux/mpage.h> | 19 | #include <linux/mpage.h> |
| 20 | #include <linux/namei.h> | 20 | #include <linux/namei.h> |
| 21 | #include <linux/aio.h> | ||
| 22 | #include <linux/uio.h> | 21 | #include <linux/uio.h> |
| 23 | #include <linux/bio.h> | 22 | #include <linux/bio.h> |
| 24 | #include <linux/workqueue.h> | 23 | #include <linux/workqueue.h> |
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 985ed023a750..497f8515d205 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c | |||
| @@ -12,12 +12,12 @@ | |||
| 12 | #include <linux/f2fs_fs.h> | 12 | #include <linux/f2fs_fs.h> |
| 13 | #include <linux/buffer_head.h> | 13 | #include <linux/buffer_head.h> |
| 14 | #include <linux/mpage.h> | 14 | #include <linux/mpage.h> |
| 15 | #include <linux/aio.h> | ||
| 16 | #include <linux/writeback.h> | 15 | #include <linux/writeback.h> |
| 17 | #include <linux/backing-dev.h> | 16 | #include <linux/backing-dev.h> |
| 18 | #include <linux/blkdev.h> | 17 | #include <linux/blkdev.h> |
| 19 | #include <linux/bio.h> | 18 | #include <linux/bio.h> |
| 20 | #include <linux/prefetch.h> | 19 | #include <linux/prefetch.h> |
| 20 | #include <linux/uio.h> | ||
| 21 | 21 | ||
| 22 | #include "f2fs.h" | 22 | #include "f2fs.h" |
| 23 | #include "node.h" | 23 | #include "node.h" |
diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 497c7c5263c7..8521207de229 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c | |||
| @@ -19,7 +19,6 @@ | |||
| 19 | #include <linux/mpage.h> | 19 | #include <linux/mpage.h> |
| 20 | #include <linux/buffer_head.h> | 20 | #include <linux/buffer_head.h> |
| 21 | #include <linux/mount.h> | 21 | #include <linux/mount.h> |
| 22 | #include <linux/aio.h> | ||
| 23 | #include <linux/vfs.h> | 22 | #include <linux/vfs.h> |
| 24 | #include <linux/parser.h> | 23 | #include <linux/parser.h> |
| 25 | #include <linux/uio.h> | 24 | #include <linux/uio.h> |
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index e907052eeadb..32a8bbd7a9ad 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c | |||
| @@ -53,6 +53,18 @@ struct wb_writeback_work { | |||
| 53 | struct completion *done; /* set if the caller waits */ | 53 | struct completion *done; /* set if the caller waits */ |
| 54 | }; | 54 | }; |
| 55 | 55 | ||
| 56 | /* | ||
| 57 | * If an inode is constantly having its pages dirtied, but then the | ||
| 58 | * updates stop dirtytime_expire_interval seconds in the past, it's | ||
| 59 | * possible for the worst case time between when an inode has its | ||
| 60 | * timestamps updated and when they finally get written out to be two | ||
| 61 | * dirtytime_expire_intervals. We set the default to 12 hours (in | ||
| 62 | * seconds), which means most of the time inodes will have their | ||
| 63 | * timestamps written to disk after 12 hours, but in the worst case a | ||
| 64 | * few inodes might not their timestamps updated for 24 hours. | ||
| 65 | */ | ||
| 66 | unsigned int dirtytime_expire_interval = 12 * 60 * 60; | ||
| 67 | |||
| 56 | /** | 68 | /** |
| 57 | * writeback_in_progress - determine whether there is writeback in progress | 69 | * writeback_in_progress - determine whether there is writeback in progress |
| 58 | * @bdi: the device's backing_dev_info structure. | 70 | * @bdi: the device's backing_dev_info structure. |
| @@ -275,8 +287,8 @@ static int move_expired_inodes(struct list_head *delaying_queue, | |||
| 275 | 287 | ||
| 276 | if ((flags & EXPIRE_DIRTY_ATIME) == 0) | 288 | if ((flags & EXPIRE_DIRTY_ATIME) == 0) |
| 277 | older_than_this = work->older_than_this; | 289 | older_than_this = work->older_than_this; |
| 278 | else if ((work->reason == WB_REASON_SYNC) == 0) { | 290 | else if (!work->for_sync) { |
| 279 | expire_time = jiffies - (HZ * 86400); | 291 | expire_time = jiffies - (dirtytime_expire_interval * HZ); |
| 280 | older_than_this = &expire_time; | 292 | older_than_this = &expire_time; |
| 281 | } | 293 | } |
| 282 | while (!list_empty(delaying_queue)) { | 294 | while (!list_empty(delaying_queue)) { |
| @@ -458,6 +470,7 @@ static void requeue_inode(struct inode *inode, struct bdi_writeback *wb, | |||
| 458 | */ | 470 | */ |
| 459 | redirty_tail(inode, wb); | 471 | redirty_tail(inode, wb); |
| 460 | } else if (inode->i_state & I_DIRTY_TIME) { | 472 | } else if (inode->i_state & I_DIRTY_TIME) { |
| 473 | inode->dirtied_when = jiffies; | ||
| 461 | list_move(&inode->i_wb_list, &wb->b_dirty_time); | 474 | list_move(&inode->i_wb_list, &wb->b_dirty_time); |
| 462 | } else { | 475 | } else { |
| 463 | /* The inode is clean. Remove from writeback lists. */ | 476 | /* The inode is clean. Remove from writeback lists. */ |
| @@ -505,12 +518,17 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc) | |||
| 505 | spin_lock(&inode->i_lock); | 518 | spin_lock(&inode->i_lock); |
| 506 | 519 | ||
| 507 | dirty = inode->i_state & I_DIRTY; | 520 | dirty = inode->i_state & I_DIRTY; |
| 508 | if (((dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) && | 521 | if (inode->i_state & I_DIRTY_TIME) { |
| 509 | (inode->i_state & I_DIRTY_TIME)) || | 522 | if ((dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) || |
| 510 | (inode->i_state & I_DIRTY_TIME_EXPIRED)) { | 523 | unlikely(inode->i_state & I_DIRTY_TIME_EXPIRED) || |
| 511 | dirty |= I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED; | 524 | unlikely(time_after(jiffies, |
| 512 | trace_writeback_lazytime(inode); | 525 | (inode->dirtied_time_when + |
| 513 | } | 526 | dirtytime_expire_interval * HZ)))) { |
| 527 | dirty |= I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED; | ||
| 528 | trace_writeback_lazytime(inode); | ||
| 529 | } | ||
| 530 | } else | ||
| 531 | inode->i_state &= ~I_DIRTY_TIME_EXPIRED; | ||
| 514 | inode->i_state &= ~dirty; | 532 | inode->i_state &= ~dirty; |
| 515 | 533 | ||
| 516 | /* | 534 | /* |
| @@ -1131,6 +1149,56 @@ void wakeup_flusher_threads(long nr_pages, enum wb_reason reason) | |||
| 1131 | rcu_read_unlock(); | 1149 | rcu_read_unlock(); |
| 1132 | } | 1150 | } |
| 1133 | 1151 | ||
| 1152 | /* | ||
| 1153 | * Wake up bdi's periodically to make sure dirtytime inodes gets | ||
| 1154 | * written back periodically. We deliberately do *not* check the | ||
| 1155 | * b_dirtytime list in wb_has_dirty_io(), since this would cause the | ||
| 1156 | * kernel to be constantly waking up once there are any dirtytime | ||
| 1157 | * inodes on the system. So instead we define a separate delayed work | ||
| 1158 | * function which gets called much more rarely. (By default, only | ||
| 1159 | * once every 12 hours.) | ||
| 1160 | * | ||
| 1161 | * If there is any other write activity going on in the file system, | ||
| 1162 | * this function won't be necessary. But if the only thing that has | ||
| 1163 | * happened on the file system is a dirtytime inode caused by an atime | ||
| 1164 | * update, we need this infrastructure below to make sure that inode | ||
| 1165 | * eventually gets pushed out to disk. | ||
| 1166 | */ | ||
| 1167 | static void wakeup_dirtytime_writeback(struct work_struct *w); | ||
| 1168 | static DECLARE_DELAYED_WORK(dirtytime_work, wakeup_dirtytime_writeback); | ||
| 1169 | |||
| 1170 | static void wakeup_dirtytime_writeback(struct work_struct *w) | ||
| 1171 | { | ||
| 1172 | struct backing_dev_info *bdi; | ||
| 1173 | |||
| 1174 | rcu_read_lock(); | ||
| 1175 | list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) { | ||
| 1176 | if (list_empty(&bdi->wb.b_dirty_time)) | ||
| 1177 | continue; | ||
| 1178 | bdi_wakeup_thread(bdi); | ||
| 1179 | } | ||
| 1180 | rcu_read_unlock(); | ||
| 1181 | schedule_delayed_work(&dirtytime_work, dirtytime_expire_interval * HZ); | ||
| 1182 | } | ||
| 1183 | |||
| 1184 | static int __init start_dirtytime_writeback(void) | ||
| 1185 | { | ||
| 1186 | schedule_delayed_work(&dirtytime_work, dirtytime_expire_interval * HZ); | ||
| 1187 | return 0; | ||
| 1188 | } | ||
| 1189 | __initcall(start_dirtytime_writeback); | ||
| 1190 | |||
| 1191 | int dirtytime_interval_handler(struct ctl_table *table, int write, | ||
| 1192 | void __user *buffer, size_t *lenp, loff_t *ppos) | ||
| 1193 | { | ||
| 1194 | int ret; | ||
| 1195 | |||
| 1196 | ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); | ||
| 1197 | if (ret == 0 && write) | ||
| 1198 | mod_delayed_work(system_wq, &dirtytime_work, 0); | ||
| 1199 | return ret; | ||
| 1200 | } | ||
| 1201 | |||
| 1134 | static noinline void block_dump___mark_inode_dirty(struct inode *inode) | 1202 | static noinline void block_dump___mark_inode_dirty(struct inode *inode) |
| 1135 | { | 1203 | { |
| 1136 | if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) { | 1204 | if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) { |
| @@ -1269,8 +1337,13 @@ void __mark_inode_dirty(struct inode *inode, int flags) | |||
| 1269 | } | 1337 | } |
| 1270 | 1338 | ||
| 1271 | inode->dirtied_when = jiffies; | 1339 | inode->dirtied_when = jiffies; |
| 1272 | list_move(&inode->i_wb_list, dirtytime ? | 1340 | if (dirtytime) |
| 1273 | &bdi->wb.b_dirty_time : &bdi->wb.b_dirty); | 1341 | inode->dirtied_time_when = jiffies; |
| 1342 | if (inode->i_state & (I_DIRTY_INODE | I_DIRTY_PAGES)) | ||
| 1343 | list_move(&inode->i_wb_list, &bdi->wb.b_dirty); | ||
| 1344 | else | ||
| 1345 | list_move(&inode->i_wb_list, | ||
| 1346 | &bdi->wb.b_dirty_time); | ||
| 1274 | spin_unlock(&bdi->wb.list_lock); | 1347 | spin_unlock(&bdi->wb.list_lock); |
| 1275 | trace_writeback_dirty_inode_enqueue(inode); | 1348 | trace_writeback_dirty_inode_enqueue(inode); |
| 1276 | 1349 | ||
diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c index 28d0c7abba1c..b3fa05032234 100644 --- a/fs/fuse/cuse.c +++ b/fs/fuse/cuse.c | |||
| @@ -38,7 +38,6 @@ | |||
| 38 | #include <linux/device.h> | 38 | #include <linux/device.h> |
| 39 | #include <linux/file.h> | 39 | #include <linux/file.h> |
| 40 | #include <linux/fs.h> | 40 | #include <linux/fs.h> |
| 41 | #include <linux/aio.h> | ||
| 42 | #include <linux/kdev_t.h> | 41 | #include <linux/kdev_t.h> |
| 43 | #include <linux/kthread.h> | 42 | #include <linux/kthread.h> |
| 44 | #include <linux/list.h> | 43 | #include <linux/list.h> |
| @@ -48,6 +47,7 @@ | |||
| 48 | #include <linux/slab.h> | 47 | #include <linux/slab.h> |
| 49 | #include <linux/stat.h> | 48 | #include <linux/stat.h> |
| 50 | #include <linux/module.h> | 49 | #include <linux/module.h> |
| 50 | #include <linux/uio.h> | ||
| 51 | 51 | ||
| 52 | #include "fuse_i.h" | 52 | #include "fuse_i.h" |
| 53 | 53 | ||
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index ed19a7d622fa..95a2797eef66 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c | |||
| @@ -19,7 +19,6 @@ | |||
| 19 | #include <linux/pipe_fs_i.h> | 19 | #include <linux/pipe_fs_i.h> |
| 20 | #include <linux/swap.h> | 20 | #include <linux/swap.h> |
| 21 | #include <linux/splice.h> | 21 | #include <linux/splice.h> |
| 22 | #include <linux/aio.h> | ||
| 23 | 22 | ||
| 24 | MODULE_ALIAS_MISCDEV(FUSE_MINOR); | 23 | MODULE_ALIAS_MISCDEV(FUSE_MINOR); |
| 25 | MODULE_ALIAS("devname:fuse"); | 24 | MODULE_ALIAS("devname:fuse"); |
| @@ -890,8 +889,8 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep) | |||
| 890 | 889 | ||
| 891 | newpage = buf->page; | 890 | newpage = buf->page; |
| 892 | 891 | ||
| 893 | if (WARN_ON(!PageUptodate(newpage))) | 892 | if (!PageUptodate(newpage)) |
| 894 | return -EIO; | 893 | SetPageUptodate(newpage); |
| 895 | 894 | ||
| 896 | ClearPageMappedToDisk(newpage); | 895 | ClearPageMappedToDisk(newpage); |
| 897 | 896 | ||
| @@ -1353,6 +1352,17 @@ static ssize_t fuse_dev_do_read(struct fuse_conn *fc, struct file *file, | |||
| 1353 | return err; | 1352 | return err; |
| 1354 | } | 1353 | } |
| 1355 | 1354 | ||
| 1355 | static int fuse_dev_open(struct inode *inode, struct file *file) | ||
| 1356 | { | ||
| 1357 | /* | ||
| 1358 | * The fuse device's file's private_data is used to hold | ||
| 1359 | * the fuse_conn(ection) when it is mounted, and is used to | ||
| 1360 | * keep track of whether the file has been mounted already. | ||
| 1361 | */ | ||
| 1362 | file->private_data = NULL; | ||
| 1363 | return 0; | ||
| 1364 | } | ||
| 1365 | |||
| 1356 | static ssize_t fuse_dev_read(struct kiocb *iocb, const struct iovec *iov, | 1366 | static ssize_t fuse_dev_read(struct kiocb *iocb, const struct iovec *iov, |
| 1357 | unsigned long nr_segs, loff_t pos) | 1367 | unsigned long nr_segs, loff_t pos) |
| 1358 | { | 1368 | { |
| @@ -1797,6 +1807,9 @@ copy_finish: | |||
| 1797 | static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code, | 1807 | static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code, |
| 1798 | unsigned int size, struct fuse_copy_state *cs) | 1808 | unsigned int size, struct fuse_copy_state *cs) |
| 1799 | { | 1809 | { |
| 1810 | /* Don't try to move pages (yet) */ | ||
| 1811 | cs->move_pages = 0; | ||
| 1812 | |||
| 1800 | switch (code) { | 1813 | switch (code) { |
| 1801 | case FUSE_NOTIFY_POLL: | 1814 | case FUSE_NOTIFY_POLL: |
| 1802 | return fuse_notify_poll(fc, size, cs); | 1815 | return fuse_notify_poll(fc, size, cs); |
| @@ -2217,6 +2230,7 @@ static int fuse_dev_fasync(int fd, struct file *file, int on) | |||
| 2217 | 2230 | ||
| 2218 | const struct file_operations fuse_dev_operations = { | 2231 | const struct file_operations fuse_dev_operations = { |
| 2219 | .owner = THIS_MODULE, | 2232 | .owner = THIS_MODULE, |
| 2233 | .open = fuse_dev_open, | ||
| 2220 | .llseek = no_llseek, | 2234 | .llseek = no_llseek, |
| 2221 | .read = do_sync_read, | 2235 | .read = do_sync_read, |
| 2222 | .aio_read = fuse_dev_read, | 2236 | .aio_read = fuse_dev_read, |
diff --git a/fs/fuse/file.c b/fs/fuse/file.c index c01ec3bdcfd8..ff102cbf16ea 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c | |||
| @@ -15,8 +15,8 @@ | |||
| 15 | #include <linux/module.h> | 15 | #include <linux/module.h> |
| 16 | #include <linux/compat.h> | 16 | #include <linux/compat.h> |
| 17 | #include <linux/swap.h> | 17 | #include <linux/swap.h> |
| 18 | #include <linux/aio.h> | ||
| 19 | #include <linux/falloc.h> | 18 | #include <linux/falloc.h> |
| 19 | #include <linux/uio.h> | ||
| 20 | 20 | ||
| 21 | static const struct file_operations fuse_direct_io_file_operations; | 21 | static const struct file_operations fuse_direct_io_file_operations; |
| 22 | 22 | ||
| @@ -528,6 +528,17 @@ static void fuse_release_user_pages(struct fuse_req *req, int write) | |||
| 528 | } | 528 | } |
| 529 | } | 529 | } |
| 530 | 530 | ||
| 531 | static ssize_t fuse_get_res_by_io(struct fuse_io_priv *io) | ||
| 532 | { | ||
| 533 | if (io->err) | ||
| 534 | return io->err; | ||
| 535 | |||
| 536 | if (io->bytes >= 0 && io->write) | ||
| 537 | return -EIO; | ||
| 538 | |||
| 539 | return io->bytes < 0 ? io->size : io->bytes; | ||
| 540 | } | ||
| 541 | |||
| 531 | /** | 542 | /** |
| 532 | * In case of short read, the caller sets 'pos' to the position of | 543 | * In case of short read, the caller sets 'pos' to the position of |
| 533 | * actual end of fuse request in IO request. Otherwise, if bytes_requested | 544 | * actual end of fuse request in IO request. Otherwise, if bytes_requested |
| @@ -546,6 +557,7 @@ static void fuse_release_user_pages(struct fuse_req *req, int write) | |||
| 546 | */ | 557 | */ |
| 547 | static void fuse_aio_complete(struct fuse_io_priv *io, int err, ssize_t pos) | 558 | static void fuse_aio_complete(struct fuse_io_priv *io, int err, ssize_t pos) |
| 548 | { | 559 | { |
| 560 | bool is_sync = is_sync_kiocb(io->iocb); | ||
| 549 | int left; | 561 | int left; |
| 550 | 562 | ||
| 551 | spin_lock(&io->lock); | 563 | spin_lock(&io->lock); |
| @@ -555,30 +567,24 @@ static void fuse_aio_complete(struct fuse_io_priv *io, int err, ssize_t pos) | |||
| 555 | io->bytes = pos; | 567 | io->bytes = pos; |
| 556 | 568 | ||
| 557 | left = --io->reqs; | 569 | left = --io->reqs; |
| 570 | if (!left && is_sync) | ||
| 571 | complete(io->done); | ||
| 558 | spin_unlock(&io->lock); | 572 | spin_unlock(&io->lock); |
| 559 | 573 | ||
| 560 | if (!left) { | 574 | if (!left && !is_sync) { |
| 561 | long res; | 575 | ssize_t res = fuse_get_res_by_io(io); |
| 562 | 576 | ||
| 563 | if (io->err) | 577 | if (res >= 0) { |
| 564 | res = io->err; | 578 | struct inode *inode = file_inode(io->iocb->ki_filp); |
| 565 | else if (io->bytes >= 0 && io->write) | 579 | struct fuse_conn *fc = get_fuse_conn(inode); |
| 566 | res = -EIO; | 580 | struct fuse_inode *fi = get_fuse_inode(inode); |
| 567 | else { | ||
| 568 | res = io->bytes < 0 ? io->size : io->bytes; | ||
| 569 | 581 | ||
| 570 | if (!is_sync_kiocb(io->iocb)) { | 582 | spin_lock(&fc->lock); |
| 571 | struct inode *inode = file_inode(io->iocb->ki_filp); | 583 | fi->attr_version = ++fc->attr_version; |
| 572 | struct fuse_conn *fc = get_fuse_conn(inode); | 584 | spin_unlock(&fc->lock); |
| 573 | struct fuse_inode *fi = get_fuse_inode(inode); | ||
| 574 | |||
| 575 | spin_lock(&fc->lock); | ||
| 576 | fi->attr_version = ++fc->attr_version; | ||
| 577 | spin_unlock(&fc->lock); | ||
| 578 | } | ||
| 579 | } | 585 | } |
| 580 | 586 | ||
| 581 | aio_complete(io->iocb, res, 0); | 587 | io->iocb->ki_complete(io->iocb, res, 0); |
| 582 | kfree(io); | 588 | kfree(io); |
| 583 | } | 589 | } |
| 584 | } | 590 | } |
| @@ -2801,6 +2807,7 @@ static ssize_t | |||
| 2801 | fuse_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, | 2807 | fuse_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, |
| 2802 | loff_t offset) | 2808 | loff_t offset) |
| 2803 | { | 2809 | { |
| 2810 | DECLARE_COMPLETION_ONSTACK(wait); | ||
| 2804 | ssize_t ret = 0; | 2811 | ssize_t ret = 0; |
| 2805 | struct file *file = iocb->ki_filp; | 2812 | struct file *file = iocb->ki_filp; |
| 2806 | struct fuse_file *ff = file->private_data; | 2813 | struct fuse_file *ff = file->private_data; |
| @@ -2852,6 +2859,9 @@ fuse_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, | |||
| 2852 | if (!is_sync_kiocb(iocb) && (offset + count > i_size) && rw == WRITE) | 2859 | if (!is_sync_kiocb(iocb) && (offset + count > i_size) && rw == WRITE) |
| 2853 | io->async = false; | 2860 | io->async = false; |
| 2854 | 2861 | ||
| 2862 | if (io->async && is_sync_kiocb(iocb)) | ||
| 2863 | io->done = &wait; | ||
| 2864 | |||
| 2855 | if (rw == WRITE) | 2865 | if (rw == WRITE) |
| 2856 | ret = __fuse_direct_write(io, iter, &pos); | 2866 | ret = __fuse_direct_write(io, iter, &pos); |
| 2857 | else | 2867 | else |
| @@ -2864,11 +2874,12 @@ fuse_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, | |||
| 2864 | if (!is_sync_kiocb(iocb)) | 2874 | if (!is_sync_kiocb(iocb)) |
| 2865 | return -EIOCBQUEUED; | 2875 | return -EIOCBQUEUED; |
| 2866 | 2876 | ||
| 2867 | ret = wait_on_sync_kiocb(iocb); | 2877 | wait_for_completion(&wait); |
| 2868 | } else { | 2878 | ret = fuse_get_res_by_io(io); |
| 2869 | kfree(io); | ||
| 2870 | } | 2879 | } |
| 2871 | 2880 | ||
| 2881 | kfree(io); | ||
| 2882 | |||
| 2872 | if (rw == WRITE) { | 2883 | if (rw == WRITE) { |
| 2873 | if (ret > 0) | 2884 | if (ret > 0) |
| 2874 | fuse_write_update_size(inode, pos); | 2885 | fuse_write_update_size(inode, pos); |
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 1cdfb07c1376..7354dc142a50 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h | |||
| @@ -263,6 +263,7 @@ struct fuse_io_priv { | |||
| 263 | int err; | 263 | int err; |
| 264 | struct kiocb *iocb; | 264 | struct kiocb *iocb; |
| 265 | struct file *file; | 265 | struct file *file; |
| 266 | struct completion *done; | ||
| 266 | }; | 267 | }; |
| 267 | 268 | ||
| 268 | /** | 269 | /** |
diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c index 7b3143064af1..1be3b061c05c 100644 --- a/fs/gfs2/acl.c +++ b/fs/gfs2/acl.c | |||
| @@ -110,11 +110,7 @@ int gfs2_set_acl(struct inode *inode, struct posix_acl *acl, int type) | |||
| 110 | error = __gfs2_xattr_set(inode, name, data, len, 0, GFS2_EATYPE_SYS); | 110 | error = __gfs2_xattr_set(inode, name, data, len, 0, GFS2_EATYPE_SYS); |
| 111 | if (error) | 111 | if (error) |
| 112 | goto out; | 112 | goto out; |
| 113 | 113 | set_cached_acl(inode, type, acl); | |
| 114 | if (acl) | ||
| 115 | set_cached_acl(inode, type, acl); | ||
| 116 | else | ||
| 117 | forget_cached_acl(inode, type); | ||
| 118 | out: | 114 | out: |
| 119 | kfree(data); | 115 | kfree(data); |
| 120 | return error; | 116 | return error; |
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index 4ad4f94edebe..a6e6990aea39 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c | |||
| @@ -20,7 +20,7 @@ | |||
| 20 | #include <linux/swap.h> | 20 | #include <linux/swap.h> |
| 21 | #include <linux/gfs2_ondisk.h> | 21 | #include <linux/gfs2_ondisk.h> |
| 22 | #include <linux/backing-dev.h> | 22 | #include <linux/backing-dev.h> |
| 23 | #include <linux/aio.h> | 23 | #include <linux/uio.h> |
| 24 | #include <trace/events/writeback.h> | 24 | #include <trace/events/writeback.h> |
| 25 | 25 | ||
| 26 | #include "gfs2.h" | 26 | #include "gfs2.h" |
| @@ -671,12 +671,12 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping, | |||
| 671 | 671 | ||
| 672 | if (alloc_required) { | 672 | if (alloc_required) { |
| 673 | struct gfs2_alloc_parms ap = { .aflags = 0, }; | 673 | struct gfs2_alloc_parms ap = { .aflags = 0, }; |
| 674 | error = gfs2_quota_lock_check(ip); | 674 | requested = data_blocks + ind_blocks; |
| 675 | ap.target = requested; | ||
| 676 | error = gfs2_quota_lock_check(ip, &ap); | ||
| 675 | if (error) | 677 | if (error) |
| 676 | goto out_unlock; | 678 | goto out_unlock; |
| 677 | 679 | ||
| 678 | requested = data_blocks + ind_blocks; | ||
| 679 | ap.target = requested; | ||
| 680 | error = gfs2_inplace_reserve(ip, &ap); | 680 | error = gfs2_inplace_reserve(ip, &ap); |
| 681 | if (error) | 681 | if (error) |
| 682 | goto out_qunlock; | 682 | goto out_qunlock; |
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index f0b945ab853e..61296ecbd0e2 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c | |||
| @@ -1224,7 +1224,7 @@ static int do_grow(struct inode *inode, u64 size) | |||
| 1224 | 1224 | ||
| 1225 | if (gfs2_is_stuffed(ip) && | 1225 | if (gfs2_is_stuffed(ip) && |
| 1226 | (size > (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)))) { | 1226 | (size > (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)))) { |
| 1227 | error = gfs2_quota_lock_check(ip); | 1227 | error = gfs2_quota_lock_check(ip, &ap); |
| 1228 | if (error) | 1228 | if (error) |
| 1229 | return error; | 1229 | return error; |
| 1230 | 1230 | ||
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 3e32bb8e2d7e..8ec43ab5babf 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c | |||
| @@ -25,7 +25,6 @@ | |||
| 25 | #include <asm/uaccess.h> | 25 | #include <asm/uaccess.h> |
| 26 | #include <linux/dlm.h> | 26 | #include <linux/dlm.h> |
| 27 | #include <linux/dlm_plock.h> | 27 | #include <linux/dlm_plock.h> |
| 28 | #include <linux/aio.h> | ||
| 29 | #include <linux/delay.h> | 28 | #include <linux/delay.h> |
| 30 | 29 | ||
| 31 | #include "gfs2.h" | 30 | #include "gfs2.h" |
| @@ -429,11 +428,11 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
| 429 | if (ret) | 428 | if (ret) |
| 430 | goto out_unlock; | 429 | goto out_unlock; |
| 431 | 430 | ||
| 432 | ret = gfs2_quota_lock_check(ip); | ||
| 433 | if (ret) | ||
| 434 | goto out_unlock; | ||
| 435 | gfs2_write_calc_reserv(ip, PAGE_CACHE_SIZE, &data_blocks, &ind_blocks); | 431 | gfs2_write_calc_reserv(ip, PAGE_CACHE_SIZE, &data_blocks, &ind_blocks); |
| 436 | ap.target = data_blocks + ind_blocks; | 432 | ap.target = data_blocks + ind_blocks; |
| 433 | ret = gfs2_quota_lock_check(ip, &ap); | ||
| 434 | if (ret) | ||
| 435 | goto out_unlock; | ||
| 437 | ret = gfs2_inplace_reserve(ip, &ap); | 436 | ret = gfs2_inplace_reserve(ip, &ap); |
| 438 | if (ret) | 437 | if (ret) |
| 439 | goto out_quota_unlock; | 438 | goto out_quota_unlock; |
| @@ -765,22 +764,30 @@ out: | |||
| 765 | brelse(dibh); | 764 | brelse(dibh); |
| 766 | return error; | 765 | return error; |
| 767 | } | 766 | } |
| 768 | 767 | /** | |
| 769 | static void calc_max_reserv(struct gfs2_inode *ip, loff_t max, loff_t *len, | 768 | * calc_max_reserv() - Reverse of write_calc_reserv. Given a number of |
| 770 | unsigned int *data_blocks, unsigned int *ind_blocks) | 769 | * blocks, determine how many bytes can be written. |
| 770 | * @ip: The inode in question. | ||
| 771 | * @len: Max cap of bytes. What we return in *len must be <= this. | ||
| 772 | * @data_blocks: Compute and return the number of data blocks needed | ||
| 773 | * @ind_blocks: Compute and return the number of indirect blocks needed | ||
| 774 | * @max_blocks: The total blocks available to work with. | ||
| 775 | * | ||
| 776 | * Returns: void, but @len, @data_blocks and @ind_blocks are filled in. | ||
| 777 | */ | ||
| 778 | static void calc_max_reserv(struct gfs2_inode *ip, loff_t *len, | ||
| 779 | unsigned int *data_blocks, unsigned int *ind_blocks, | ||
| 780 | unsigned int max_blocks) | ||
| 771 | { | 781 | { |
| 782 | loff_t max = *len; | ||
| 772 | const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | 783 | const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); |
| 773 | unsigned int max_blocks = ip->i_rgd->rd_free_clone; | ||
| 774 | unsigned int tmp, max_data = max_blocks - 3 * (sdp->sd_max_height - 1); | 784 | unsigned int tmp, max_data = max_blocks - 3 * (sdp->sd_max_height - 1); |
| 775 | 785 | ||
| 776 | for (tmp = max_data; tmp > sdp->sd_diptrs;) { | 786 | for (tmp = max_data; tmp > sdp->sd_diptrs;) { |
| 777 | tmp = DIV_ROUND_UP(tmp, sdp->sd_inptrs); | 787 | tmp = DIV_ROUND_UP(tmp, sdp->sd_inptrs); |
| 778 | max_data -= tmp; | 788 | max_data -= tmp; |
| 779 | } | 789 | } |
| 780 | /* This calculation isn't the exact reverse of gfs2_write_calc_reserve, | 790 | |
| 781 | so it might end up with fewer data blocks */ | ||
| 782 | if (max_data <= *data_blocks) | ||
| 783 | return; | ||
| 784 | *data_blocks = max_data; | 791 | *data_blocks = max_data; |
| 785 | *ind_blocks = max_blocks - max_data; | 792 | *ind_blocks = max_blocks - max_data; |
| 786 | *len = ((loff_t)max_data - 3) << sdp->sd_sb.sb_bsize_shift; | 793 | *len = ((loff_t)max_data - 3) << sdp->sd_sb.sb_bsize_shift; |
| @@ -797,7 +804,7 @@ static long __gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t | |||
| 797 | struct gfs2_inode *ip = GFS2_I(inode); | 804 | struct gfs2_inode *ip = GFS2_I(inode); |
| 798 | struct gfs2_alloc_parms ap = { .aflags = 0, }; | 805 | struct gfs2_alloc_parms ap = { .aflags = 0, }; |
| 799 | unsigned int data_blocks = 0, ind_blocks = 0, rblocks; | 806 | unsigned int data_blocks = 0, ind_blocks = 0, rblocks; |
| 800 | loff_t bytes, max_bytes; | 807 | loff_t bytes, max_bytes, max_blks = UINT_MAX; |
| 801 | int error; | 808 | int error; |
| 802 | const loff_t pos = offset; | 809 | const loff_t pos = offset; |
| 803 | const loff_t count = len; | 810 | const loff_t count = len; |
| @@ -819,6 +826,9 @@ static long __gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t | |||
| 819 | 826 | ||
| 820 | gfs2_size_hint(file, offset, len); | 827 | gfs2_size_hint(file, offset, len); |
| 821 | 828 | ||
| 829 | gfs2_write_calc_reserv(ip, PAGE_SIZE, &data_blocks, &ind_blocks); | ||
| 830 | ap.min_target = data_blocks + ind_blocks; | ||
| 831 | |||
| 822 | while (len > 0) { | 832 | while (len > 0) { |
| 823 | if (len < bytes) | 833 | if (len < bytes) |
| 824 | bytes = len; | 834 | bytes = len; |
| @@ -827,27 +837,41 @@ static long __gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t | |||
| 827 | offset += bytes; | 837 | offset += bytes; |
| 828 | continue; | 838 | continue; |
| 829 | } | 839 | } |
| 830 | error = gfs2_quota_lock_check(ip); | 840 | |
| 841 | /* We need to determine how many bytes we can actually | ||
| 842 | * fallocate without exceeding quota or going over the | ||
| 843 | * end of the fs. We start off optimistically by assuming | ||
| 844 | * we can write max_bytes */ | ||
| 845 | max_bytes = (len > max_chunk_size) ? max_chunk_size : len; | ||
| 846 | |||
| 847 | /* Since max_bytes is most likely a theoretical max, we | ||
| 848 | * calculate a more realistic 'bytes' to serve as a good | ||
| 849 | * starting point for the number of bytes we may be able | ||
| 850 | * to write */ | ||
| 851 | gfs2_write_calc_reserv(ip, bytes, &data_blocks, &ind_blocks); | ||
| 852 | ap.target = data_blocks + ind_blocks; | ||
| 853 | |||
| 854 | error = gfs2_quota_lock_check(ip, &ap); | ||
| 831 | if (error) | 855 | if (error) |
| 832 | return error; | 856 | return error; |
| 833 | retry: | 857 | /* ap.allowed tells us how many blocks quota will allow |
| 834 | gfs2_write_calc_reserv(ip, bytes, &data_blocks, &ind_blocks); | 858 | * us to write. Check if this reduces max_blks */ |
| 859 | if (ap.allowed && ap.allowed < max_blks) | ||
| 860 | max_blks = ap.allowed; | ||
| 835 | 861 | ||
| 836 | ap.target = data_blocks + ind_blocks; | ||
| 837 | error = gfs2_inplace_reserve(ip, &ap); | 862 | error = gfs2_inplace_reserve(ip, &ap); |
| 838 | if (error) { | 863 | if (error) |
| 839 | if (error == -ENOSPC && bytes > sdp->sd_sb.sb_bsize) { | ||
| 840 | bytes >>= 1; | ||
| 841 | bytes &= bsize_mask; | ||
| 842 | if (bytes == 0) | ||
| 843 | bytes = sdp->sd_sb.sb_bsize; | ||
| 844 | goto retry; | ||
| 845 | } | ||
| 846 | goto out_qunlock; | 864 | goto out_qunlock; |
| 847 | } | 865 | |
| 848 | max_bytes = bytes; | 866 | /* check if the selected rgrp limits our max_blks further */ |
| 849 | calc_max_reserv(ip, (len > max_chunk_size)? max_chunk_size: len, | 867 | if (ap.allowed && ap.allowed < max_blks) |
| 850 | &max_bytes, &data_blocks, &ind_blocks); | 868 | max_blks = ap.allowed; |
| 869 | |||
| 870 | /* Almost done. Calculate bytes that can be written using | ||
| 871 | * max_blks. We also recompute max_bytes, data_blocks and | ||
| 872 | * ind_blocks */ | ||
| 873 | calc_max_reserv(ip, &max_bytes, &data_blocks, | ||
| 874 | &ind_blocks, max_blks); | ||
| 851 | 875 | ||
| 852 | rblocks = RES_DINODE + ind_blocks + RES_STATFS + RES_QUOTA + | 876 | rblocks = RES_DINODE + ind_blocks + RES_STATFS + RES_QUOTA + |
| 853 | RES_RG_HDR + gfs2_rg_blocks(ip, data_blocks + ind_blocks); | 877 | RES_RG_HDR + gfs2_rg_blocks(ip, data_blocks + ind_blocks); |
| @@ -931,6 +955,22 @@ out_uninit: | |||
| 931 | return ret; | 955 | return ret; |
| 932 | } | 956 | } |
| 933 | 957 | ||
| 958 | static ssize_t gfs2_file_splice_write(struct pipe_inode_info *pipe, | ||
| 959 | struct file *out, loff_t *ppos, | ||
| 960 | size_t len, unsigned int flags) | ||
| 961 | { | ||
| 962 | int error; | ||
| 963 | struct gfs2_inode *ip = GFS2_I(out->f_mapping->host); | ||
| 964 | |||
| 965 | error = gfs2_rs_alloc(ip); | ||
| 966 | if (error) | ||
| 967 | return (ssize_t)error; | ||
| 968 | |||
| 969 | gfs2_size_hint(out, *ppos, len); | ||
| 970 | |||
| 971 | return iter_file_splice_write(pipe, out, ppos, len, flags); | ||
| 972 | } | ||
| 973 | |||
| 934 | #ifdef CONFIG_GFS2_FS_LOCKING_DLM | 974 | #ifdef CONFIG_GFS2_FS_LOCKING_DLM |
| 935 | 975 | ||
| 936 | /** | 976 | /** |
| @@ -1077,7 +1117,7 @@ const struct file_operations gfs2_file_fops = { | |||
| 1077 | .lock = gfs2_lock, | 1117 | .lock = gfs2_lock, |
| 1078 | .flock = gfs2_flock, | 1118 | .flock = gfs2_flock, |
| 1079 | .splice_read = generic_file_splice_read, | 1119 | .splice_read = generic_file_splice_read, |
| 1080 | .splice_write = iter_file_splice_write, | 1120 | .splice_write = gfs2_file_splice_write, |
| 1081 | .setlease = simple_nosetlease, | 1121 | .setlease = simple_nosetlease, |
| 1082 | .fallocate = gfs2_fallocate, | 1122 | .fallocate = gfs2_fallocate, |
| 1083 | }; | 1123 | }; |
| @@ -1107,7 +1147,7 @@ const struct file_operations gfs2_file_fops_nolock = { | |||
| 1107 | .release = gfs2_release, | 1147 | .release = gfs2_release, |
| 1108 | .fsync = gfs2_fsync, | 1148 | .fsync = gfs2_fsync, |
| 1109 | .splice_read = generic_file_splice_read, | 1149 | .splice_read = generic_file_splice_read, |
| 1110 | .splice_write = iter_file_splice_write, | 1150 | .splice_write = gfs2_file_splice_write, |
| 1111 | .setlease = generic_setlease, | 1151 | .setlease = generic_setlease, |
| 1112 | .fallocate = gfs2_fallocate, | 1152 | .fallocate = gfs2_fallocate, |
| 1113 | }; | 1153 | }; |
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index f42dffba056a..0fa8062f85a7 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c | |||
| @@ -2047,34 +2047,41 @@ static const struct file_operations gfs2_sbstats_fops = { | |||
| 2047 | 2047 | ||
| 2048 | int gfs2_create_debugfs_file(struct gfs2_sbd *sdp) | 2048 | int gfs2_create_debugfs_file(struct gfs2_sbd *sdp) |
| 2049 | { | 2049 | { |
| 2050 | sdp->debugfs_dir = debugfs_create_dir(sdp->sd_table_name, gfs2_root); | 2050 | struct dentry *dent; |
| 2051 | if (!sdp->debugfs_dir) | 2051 | |
| 2052 | return -ENOMEM; | 2052 | dent = debugfs_create_dir(sdp->sd_table_name, gfs2_root); |
| 2053 | sdp->debugfs_dentry_glocks = debugfs_create_file("glocks", | 2053 | if (IS_ERR_OR_NULL(dent)) |
| 2054 | S_IFREG | S_IRUGO, | 2054 | goto fail; |
| 2055 | sdp->debugfs_dir, sdp, | 2055 | sdp->debugfs_dir = dent; |
| 2056 | &gfs2_glocks_fops); | 2056 | |
| 2057 | if (!sdp->debugfs_dentry_glocks) | 2057 | dent = debugfs_create_file("glocks", |
| 2058 | S_IFREG | S_IRUGO, | ||
| 2059 | sdp->debugfs_dir, sdp, | ||
| 2060 | &gfs2_glocks_fops); | ||
| 2061 | if (IS_ERR_OR_NULL(dent)) | ||
| 2058 | goto fail; | 2062 | goto fail; |
| 2063 | sdp->debugfs_dentry_glocks = dent; | ||
| 2059 | 2064 | ||
| 2060 | sdp->debugfs_dentry_glstats = debugfs_create_file("glstats", | 2065 | dent = debugfs_create_file("glstats", |
| 2061 | S_IFREG | S_IRUGO, | 2066 | S_IFREG | S_IRUGO, |
| 2062 | sdp->debugfs_dir, sdp, | 2067 | sdp->debugfs_dir, sdp, |
| 2063 | &gfs2_glstats_fops); | 2068 | &gfs2_glstats_fops); |
| 2064 | if (!sdp->debugfs_dentry_glstats) | 2069 | if (IS_ERR_OR_NULL(dent)) |
| 2065 | goto fail; | 2070 | goto fail; |
| 2071 | sdp->debugfs_dentry_glstats = dent; | ||
| 2066 | 2072 | ||
| 2067 | sdp->debugfs_dentry_sbstats = debugfs_create_file("sbstats", | 2073 | dent = debugfs_create_file("sbstats", |
| 2068 | S_IFREG | S_IRUGO, | 2074 | S_IFREG | S_IRUGO, |
| 2069 | sdp->debugfs_dir, sdp, | 2075 | sdp->debugfs_dir, sdp, |
| 2070 | &gfs2_sbstats_fops); | 2076 | &gfs2_sbstats_fops); |
| 2071 | if (!sdp->debugfs_dentry_sbstats) | 2077 | if (IS_ERR_OR_NULL(dent)) |
| 2072 | goto fail; | 2078 | goto fail; |
| 2079 | sdp->debugfs_dentry_sbstats = dent; | ||
| 2073 | 2080 | ||
| 2074 | return 0; | 2081 | return 0; |
| 2075 | fail: | 2082 | fail: |
| 2076 | gfs2_delete_debugfs_file(sdp); | 2083 | gfs2_delete_debugfs_file(sdp); |
| 2077 | return -ENOMEM; | 2084 | return dent ? PTR_ERR(dent) : -ENOMEM; |
| 2078 | } | 2085 | } |
| 2079 | 2086 | ||
| 2080 | void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp) | 2087 | void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp) |
| @@ -2100,6 +2107,8 @@ void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp) | |||
| 2100 | int gfs2_register_debugfs(void) | 2107 | int gfs2_register_debugfs(void) |
| 2101 | { | 2108 | { |
| 2102 | gfs2_root = debugfs_create_dir("gfs2", NULL); | 2109 | gfs2_root = debugfs_create_dir("gfs2", NULL); |
| 2110 | if (IS_ERR(gfs2_root)) | ||
| 2111 | return PTR_ERR(gfs2_root); | ||
| 2103 | return gfs2_root ? 0 : -ENOMEM; | 2112 | return gfs2_root ? 0 : -ENOMEM; |
| 2104 | } | 2113 | } |
| 2105 | 2114 | ||
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 7a2dbbc0d634..58b75abf6ab2 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h | |||
| @@ -301,8 +301,10 @@ struct gfs2_blkreserv { | |||
| 301 | * to the allocation code. | 301 | * to the allocation code. |
| 302 | */ | 302 | */ |
| 303 | struct gfs2_alloc_parms { | 303 | struct gfs2_alloc_parms { |
| 304 | u32 target; | 304 | u64 target; |
| 305 | u32 min_target; | ||
| 305 | u32 aflags; | 306 | u32 aflags; |
| 307 | u64 allowed; | ||
| 306 | }; | 308 | }; |
| 307 | 309 | ||
| 308 | enum { | 310 | enum { |
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 73c72253faac..08bc84d7e768 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c | |||
| @@ -382,7 +382,7 @@ static int alloc_dinode(struct gfs2_inode *ip, u32 flags, unsigned *dblocks) | |||
| 382 | struct gfs2_alloc_parms ap = { .target = *dblocks, .aflags = flags, }; | 382 | struct gfs2_alloc_parms ap = { .target = *dblocks, .aflags = flags, }; |
| 383 | int error; | 383 | int error; |
| 384 | 384 | ||
| 385 | error = gfs2_quota_lock_check(ip); | 385 | error = gfs2_quota_lock_check(ip, &ap); |
| 386 | if (error) | 386 | if (error) |
| 387 | goto out; | 387 | goto out; |
| 388 | 388 | ||
| @@ -525,7 +525,7 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name, | |||
| 525 | int error; | 525 | int error; |
| 526 | 526 | ||
| 527 | if (da->nr_blocks) { | 527 | if (da->nr_blocks) { |
| 528 | error = gfs2_quota_lock_check(dip); | 528 | error = gfs2_quota_lock_check(dip, &ap); |
| 529 | if (error) | 529 | if (error) |
| 530 | goto fail_quota_locks; | 530 | goto fail_quota_locks; |
| 531 | 531 | ||
| @@ -953,7 +953,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir, | |||
| 953 | 953 | ||
| 954 | if (da.nr_blocks) { | 954 | if (da.nr_blocks) { |
| 955 | struct gfs2_alloc_parms ap = { .target = da.nr_blocks, }; | 955 | struct gfs2_alloc_parms ap = { .target = da.nr_blocks, }; |
| 956 | error = gfs2_quota_lock_check(dip); | 956 | error = gfs2_quota_lock_check(dip, &ap); |
| 957 | if (error) | 957 | if (error) |
| 958 | goto out_gunlock; | 958 | goto out_gunlock; |
| 959 | 959 | ||
| @@ -1470,7 +1470,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, | |||
| 1470 | 1470 | ||
| 1471 | if (da.nr_blocks) { | 1471 | if (da.nr_blocks) { |
| 1472 | struct gfs2_alloc_parms ap = { .target = da.nr_blocks, }; | 1472 | struct gfs2_alloc_parms ap = { .target = da.nr_blocks, }; |
| 1473 | error = gfs2_quota_lock_check(ndip); | 1473 | error = gfs2_quota_lock_check(ndip, &ap); |
| 1474 | if (error) | 1474 | if (error) |
| 1475 | goto out_gunlock; | 1475 | goto out_gunlock; |
| 1476 | 1476 | ||
| @@ -1669,6 +1669,7 @@ static int setattr_chown(struct inode *inode, struct iattr *attr) | |||
| 1669 | kuid_t ouid, nuid; | 1669 | kuid_t ouid, nuid; |
| 1670 | kgid_t ogid, ngid; | 1670 | kgid_t ogid, ngid; |
| 1671 | int error; | 1671 | int error; |
| 1672 | struct gfs2_alloc_parms ap; | ||
| 1672 | 1673 | ||
| 1673 | ouid = inode->i_uid; | 1674 | ouid = inode->i_uid; |
| 1674 | ogid = inode->i_gid; | 1675 | ogid = inode->i_gid; |
| @@ -1696,9 +1697,11 @@ static int setattr_chown(struct inode *inode, struct iattr *attr) | |||
| 1696 | if (error) | 1697 | if (error) |
| 1697 | goto out; | 1698 | goto out; |
| 1698 | 1699 | ||
| 1700 | ap.target = gfs2_get_inode_blocks(&ip->i_inode); | ||
| 1701 | |||
| 1699 | if (!uid_eq(ouid, NO_UID_QUOTA_CHANGE) || | 1702 | if (!uid_eq(ouid, NO_UID_QUOTA_CHANGE) || |
| 1700 | !gid_eq(ogid, NO_GID_QUOTA_CHANGE)) { | 1703 | !gid_eq(ogid, NO_GID_QUOTA_CHANGE)) { |
| 1701 | error = gfs2_quota_check(ip, nuid, ngid); | 1704 | error = gfs2_quota_check(ip, nuid, ngid, &ap); |
| 1702 | if (error) | 1705 | if (error) |
| 1703 | goto out_gunlock_q; | 1706 | goto out_gunlock_q; |
| 1704 | } | 1707 | } |
| @@ -1713,9 +1716,8 @@ static int setattr_chown(struct inode *inode, struct iattr *attr) | |||
| 1713 | 1716 | ||
| 1714 | if (!uid_eq(ouid, NO_UID_QUOTA_CHANGE) || | 1717 | if (!uid_eq(ouid, NO_UID_QUOTA_CHANGE) || |
| 1715 | !gid_eq(ogid, NO_GID_QUOTA_CHANGE)) { | 1718 | !gid_eq(ogid, NO_GID_QUOTA_CHANGE)) { |
| 1716 | u64 blocks = gfs2_get_inode_blocks(&ip->i_inode); | 1719 | gfs2_quota_change(ip, -ap.target, ouid, ogid); |
| 1717 | gfs2_quota_change(ip, -blocks, ouid, ogid); | 1720 | gfs2_quota_change(ip, ap.target, nuid, ngid); |
| 1718 | gfs2_quota_change(ip, blocks, nuid, ngid); | ||
| 1719 | } | 1721 | } |
| 1720 | 1722 | ||
| 1721 | out_end_trans: | 1723 | out_end_trans: |
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 3aa17d4d1cfc..5c27e48aa76f 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c | |||
| @@ -923,6 +923,9 @@ restart: | |||
| 923 | if (error) | 923 | if (error) |
| 924 | return error; | 924 | return error; |
| 925 | 925 | ||
| 926 | if (test_and_clear_bit(QDF_REFRESH, &qd->qd_flags)) | ||
| 927 | force_refresh = FORCE; | ||
| 928 | |||
| 926 | qd->qd_qb = *(struct gfs2_quota_lvb *)qd->qd_gl->gl_lksb.sb_lvbptr; | 929 | qd->qd_qb = *(struct gfs2_quota_lvb *)qd->qd_gl->gl_lksb.sb_lvbptr; |
| 927 | 930 | ||
| 928 | if (force_refresh || qd->qd_qb.qb_magic != cpu_to_be32(GFS2_MAGIC)) { | 931 | if (force_refresh || qd->qd_qb.qb_magic != cpu_to_be32(GFS2_MAGIC)) { |
| @@ -974,11 +977,8 @@ int gfs2_quota_lock(struct gfs2_inode *ip, kuid_t uid, kgid_t gid) | |||
| 974 | sizeof(struct gfs2_quota_data *), sort_qd, NULL); | 977 | sizeof(struct gfs2_quota_data *), sort_qd, NULL); |
| 975 | 978 | ||
| 976 | for (x = 0; x < ip->i_res->rs_qa_qd_num; x++) { | 979 | for (x = 0; x < ip->i_res->rs_qa_qd_num; x++) { |
| 977 | int force = NO_FORCE; | ||
| 978 | qd = ip->i_res->rs_qa_qd[x]; | 980 | qd = ip->i_res->rs_qa_qd[x]; |
| 979 | if (test_and_clear_bit(QDF_REFRESH, &qd->qd_flags)) | 981 | error = do_glock(qd, NO_FORCE, &ip->i_res->rs_qa_qd_ghs[x]); |
| 980 | force = FORCE; | ||
| 981 | error = do_glock(qd, force, &ip->i_res->rs_qa_qd_ghs[x]); | ||
| 982 | if (error) | 982 | if (error) |
| 983 | break; | 983 | break; |
| 984 | } | 984 | } |
| @@ -1094,14 +1094,33 @@ static int print_message(struct gfs2_quota_data *qd, char *type) | |||
| 1094 | return 0; | 1094 | return 0; |
| 1095 | } | 1095 | } |
| 1096 | 1096 | ||
| 1097 | int gfs2_quota_check(struct gfs2_inode *ip, kuid_t uid, kgid_t gid) | 1097 | /** |
| 1098 | * gfs2_quota_check - check if allocating new blocks will exceed quota | ||
| 1099 | * @ip: The inode for which this check is being performed | ||
| 1100 | * @uid: The uid to check against | ||
| 1101 | * @gid: The gid to check against | ||
| 1102 | * @ap: The allocation parameters. ap->target contains the requested | ||
| 1103 | * blocks. ap->min_target, if set, contains the minimum blks | ||
| 1104 | * requested. | ||
| 1105 | * | ||
| 1106 | * Returns: 0 on success. | ||
| 1107 | * min_req = ap->min_target ? ap->min_target : ap->target; | ||
| 1108 | * quota must allow atleast min_req blks for success and | ||
| 1109 | * ap->allowed is set to the number of blocks allowed | ||
| 1110 | * | ||
| 1111 | * -EDQUOT otherwise, quota violation. ap->allowed is set to number | ||
| 1112 | * of blocks available. | ||
| 1113 | */ | ||
| 1114 | int gfs2_quota_check(struct gfs2_inode *ip, kuid_t uid, kgid_t gid, | ||
| 1115 | struct gfs2_alloc_parms *ap) | ||
| 1098 | { | 1116 | { |
| 1099 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | 1117 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); |
| 1100 | struct gfs2_quota_data *qd; | 1118 | struct gfs2_quota_data *qd; |
| 1101 | s64 value; | 1119 | s64 value, warn, limit; |
| 1102 | unsigned int x; | 1120 | unsigned int x; |
| 1103 | int error = 0; | 1121 | int error = 0; |
| 1104 | 1122 | ||
| 1123 | ap->allowed = UINT_MAX; /* Assume we are permitted a whole lot */ | ||
| 1105 | if (!test_bit(GIF_QD_LOCKED, &ip->i_flags)) | 1124 | if (!test_bit(GIF_QD_LOCKED, &ip->i_flags)) |
| 1106 | return 0; | 1125 | return 0; |
| 1107 | 1126 | ||
| @@ -1115,30 +1134,37 @@ int gfs2_quota_check(struct gfs2_inode *ip, kuid_t uid, kgid_t gid) | |||
| 1115 | qid_eq(qd->qd_id, make_kqid_gid(gid)))) | 1134 | qid_eq(qd->qd_id, make_kqid_gid(gid)))) |
| 1116 | continue; | 1135 | continue; |
| 1117 | 1136 | ||
| 1137 | warn = (s64)be64_to_cpu(qd->qd_qb.qb_warn); | ||
| 1138 | limit = (s64)be64_to_cpu(qd->qd_qb.qb_limit); | ||
| 1118 | value = (s64)be64_to_cpu(qd->qd_qb.qb_value); | 1139 | value = (s64)be64_to_cpu(qd->qd_qb.qb_value); |
| 1119 | spin_lock(&qd_lock); | 1140 | spin_lock(&qd_lock); |
| 1120 | value += qd->qd_change; | 1141 | value += qd->qd_change; |
| 1121 | spin_unlock(&qd_lock); | 1142 | spin_unlock(&qd_lock); |
| 1122 | 1143 | ||
| 1123 | if (be64_to_cpu(qd->qd_qb.qb_limit) && (s64)be64_to_cpu(qd->qd_qb.qb_limit) < value) { | 1144 | if (limit > 0 && (limit - value) < ap->allowed) |
| 1124 | print_message(qd, "exceeded"); | 1145 | ap->allowed = limit - value; |
| 1125 | quota_send_warning(qd->qd_id, | 1146 | /* If we can't meet the target */ |
| 1126 | sdp->sd_vfs->s_dev, QUOTA_NL_BHARDWARN); | 1147 | if (limit && limit < (value + (s64)ap->target)) { |
| 1127 | 1148 | /* If no min_target specified or we don't meet | |
| 1128 | error = -EDQUOT; | 1149 | * min_target, return -EDQUOT */ |
| 1129 | break; | 1150 | if (!ap->min_target || ap->min_target > ap->allowed) { |
| 1130 | } else if (be64_to_cpu(qd->qd_qb.qb_warn) && | 1151 | print_message(qd, "exceeded"); |
| 1131 | (s64)be64_to_cpu(qd->qd_qb.qb_warn) < value && | 1152 | quota_send_warning(qd->qd_id, |
| 1153 | sdp->sd_vfs->s_dev, | ||
| 1154 | QUOTA_NL_BHARDWARN); | ||
| 1155 | error = -EDQUOT; | ||
| 1156 | break; | ||
| 1157 | } | ||
| 1158 | } else if (warn && warn < value && | ||
| 1132 | time_after_eq(jiffies, qd->qd_last_warn + | 1159 | time_after_eq(jiffies, qd->qd_last_warn + |
| 1133 | gfs2_tune_get(sdp, | 1160 | gfs2_tune_get(sdp, gt_quota_warn_period) |
| 1134 | gt_quota_warn_period) * HZ)) { | 1161 | * HZ)) { |
| 1135 | quota_send_warning(qd->qd_id, | 1162 | quota_send_warning(qd->qd_id, |
| 1136 | sdp->sd_vfs->s_dev, QUOTA_NL_BSOFTWARN); | 1163 | sdp->sd_vfs->s_dev, QUOTA_NL_BSOFTWARN); |
| 1137 | error = print_message(qd, "warning"); | 1164 | error = print_message(qd, "warning"); |
| 1138 | qd->qd_last_warn = jiffies; | 1165 | qd->qd_last_warn = jiffies; |
| 1139 | } | 1166 | } |
| 1140 | } | 1167 | } |
| 1141 | |||
| 1142 | return error; | 1168 | return error; |
| 1143 | } | 1169 | } |
| 1144 | 1170 | ||
diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h index 55d506eb3c4a..ad04b3acae2b 100644 --- a/fs/gfs2/quota.h +++ b/fs/gfs2/quota.h | |||
| @@ -24,7 +24,8 @@ extern void gfs2_quota_unhold(struct gfs2_inode *ip); | |||
| 24 | extern int gfs2_quota_lock(struct gfs2_inode *ip, kuid_t uid, kgid_t gid); | 24 | extern int gfs2_quota_lock(struct gfs2_inode *ip, kuid_t uid, kgid_t gid); |
| 25 | extern void gfs2_quota_unlock(struct gfs2_inode *ip); | 25 | extern void gfs2_quota_unlock(struct gfs2_inode *ip); |
| 26 | 26 | ||
| 27 | extern int gfs2_quota_check(struct gfs2_inode *ip, kuid_t uid, kgid_t gid); | 27 | extern int gfs2_quota_check(struct gfs2_inode *ip, kuid_t uid, kgid_t gid, |
| 28 | struct gfs2_alloc_parms *ap); | ||
| 28 | extern void gfs2_quota_change(struct gfs2_inode *ip, s64 change, | 29 | extern void gfs2_quota_change(struct gfs2_inode *ip, s64 change, |
| 29 | kuid_t uid, kgid_t gid); | 30 | kuid_t uid, kgid_t gid); |
| 30 | 31 | ||
| @@ -37,7 +38,8 @@ extern int gfs2_quotad(void *data); | |||
| 37 | 38 | ||
| 38 | extern void gfs2_wake_up_statfs(struct gfs2_sbd *sdp); | 39 | extern void gfs2_wake_up_statfs(struct gfs2_sbd *sdp); |
| 39 | 40 | ||
| 40 | static inline int gfs2_quota_lock_check(struct gfs2_inode *ip) | 41 | static inline int gfs2_quota_lock_check(struct gfs2_inode *ip, |
| 42 | struct gfs2_alloc_parms *ap) | ||
| 41 | { | 43 | { |
| 42 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | 44 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); |
| 43 | int ret; | 45 | int ret; |
| @@ -48,7 +50,7 @@ static inline int gfs2_quota_lock_check(struct gfs2_inode *ip) | |||
| 48 | return ret; | 50 | return ret; |
| 49 | if (sdp->sd_args.ar_quota != GFS2_QUOTA_ON) | 51 | if (sdp->sd_args.ar_quota != GFS2_QUOTA_ON) |
| 50 | return 0; | 52 | return 0; |
| 51 | ret = gfs2_quota_check(ip, ip->i_inode.i_uid, ip->i_inode.i_gid); | 53 | ret = gfs2_quota_check(ip, ip->i_inode.i_uid, ip->i_inode.i_gid, ap); |
| 52 | if (ret) | 54 | if (ret) |
| 53 | gfs2_quota_unlock(ip); | 55 | gfs2_quota_unlock(ip); |
| 54 | return ret; | 56 | return ret; |
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 9150207f365c..6af2396a317c 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c | |||
| @@ -1946,10 +1946,18 @@ static inline int fast_to_acquire(struct gfs2_rgrpd *rgd) | |||
| 1946 | * @ip: the inode to reserve space for | 1946 | * @ip: the inode to reserve space for |
| 1947 | * @ap: the allocation parameters | 1947 | * @ap: the allocation parameters |
| 1948 | * | 1948 | * |
| 1949 | * Returns: errno | 1949 | * We try our best to find an rgrp that has at least ap->target blocks |
| 1950 | * available. After a couple of passes (loops == 2), the prospects of finding | ||
| 1951 | * such an rgrp diminish. At this stage, we return the first rgrp that has | ||
| 1952 | * atleast ap->min_target blocks available. Either way, we set ap->allowed to | ||
| 1953 | * the number of blocks available in the chosen rgrp. | ||
| 1954 | * | ||
| 1955 | * Returns: 0 on success, | ||
| 1956 | * -ENOMEM if a suitable rgrp can't be found | ||
| 1957 | * errno otherwise | ||
| 1950 | */ | 1958 | */ |
| 1951 | 1959 | ||
| 1952 | int gfs2_inplace_reserve(struct gfs2_inode *ip, const struct gfs2_alloc_parms *ap) | 1960 | int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap) |
| 1953 | { | 1961 | { |
| 1954 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | 1962 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); |
| 1955 | struct gfs2_rgrpd *begin = NULL; | 1963 | struct gfs2_rgrpd *begin = NULL; |
| @@ -2012,7 +2020,7 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, const struct gfs2_alloc_parms *a | |||
| 2012 | /* Skip unuseable resource groups */ | 2020 | /* Skip unuseable resource groups */ |
| 2013 | if ((rs->rs_rbm.rgd->rd_flags & (GFS2_RGF_NOALLOC | | 2021 | if ((rs->rs_rbm.rgd->rd_flags & (GFS2_RGF_NOALLOC | |
| 2014 | GFS2_RDF_ERROR)) || | 2022 | GFS2_RDF_ERROR)) || |
| 2015 | (ap->target > rs->rs_rbm.rgd->rd_extfail_pt)) | 2023 | (loops == 0 && ap->target > rs->rs_rbm.rgd->rd_extfail_pt)) |
| 2016 | goto skip_rgrp; | 2024 | goto skip_rgrp; |
| 2017 | 2025 | ||
| 2018 | if (sdp->sd_args.ar_rgrplvb) | 2026 | if (sdp->sd_args.ar_rgrplvb) |
| @@ -2027,11 +2035,13 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, const struct gfs2_alloc_parms *a | |||
| 2027 | goto check_rgrp; | 2035 | goto check_rgrp; |
| 2028 | 2036 | ||
| 2029 | /* If rgrp has enough free space, use it */ | 2037 | /* If rgrp has enough free space, use it */ |
| 2030 | if (rs->rs_rbm.rgd->rd_free_clone >= ap->target) { | 2038 | if (rs->rs_rbm.rgd->rd_free_clone >= ap->target || |
| 2039 | (loops == 2 && ap->min_target && | ||
| 2040 | rs->rs_rbm.rgd->rd_free_clone >= ap->min_target)) { | ||
| 2031 | ip->i_rgd = rs->rs_rbm.rgd; | 2041 | ip->i_rgd = rs->rs_rbm.rgd; |
| 2042 | ap->allowed = ip->i_rgd->rd_free_clone; | ||
| 2032 | return 0; | 2043 | return 0; |
| 2033 | } | 2044 | } |
| 2034 | |||
| 2035 | check_rgrp: | 2045 | check_rgrp: |
| 2036 | /* Check for unlinked inodes which can be reclaimed */ | 2046 | /* Check for unlinked inodes which can be reclaimed */ |
| 2037 | if (rs->rs_rbm.rgd->rd_flags & GFS2_RDF_CHECK) | 2047 | if (rs->rs_rbm.rgd->rd_flags & GFS2_RDF_CHECK) |
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h index b104f4af3afd..68972ecfbb01 100644 --- a/fs/gfs2/rgrp.h +++ b/fs/gfs2/rgrp.h | |||
| @@ -41,7 +41,8 @@ extern void gfs2_rgrp_go_unlock(struct gfs2_holder *gh); | |||
| 41 | extern struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip); | 41 | extern struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip); |
| 42 | 42 | ||
| 43 | #define GFS2_AF_ORLOV 1 | 43 | #define GFS2_AF_ORLOV 1 |
| 44 | extern int gfs2_inplace_reserve(struct gfs2_inode *ip, const struct gfs2_alloc_parms *ap); | 44 | extern int gfs2_inplace_reserve(struct gfs2_inode *ip, |
| 45 | struct gfs2_alloc_parms *ap); | ||
| 45 | extern void gfs2_inplace_release(struct gfs2_inode *ip); | 46 | extern void gfs2_inplace_release(struct gfs2_inode *ip); |
| 46 | 47 | ||
| 47 | extern int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *n, | 48 | extern int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *n, |
diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c index 0b81f783f787..fd260ce8869a 100644 --- a/fs/gfs2/xattr.c +++ b/fs/gfs2/xattr.c | |||
| @@ -732,7 +732,7 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er, | |||
| 732 | if (error) | 732 | if (error) |
| 733 | return error; | 733 | return error; |
| 734 | 734 | ||
| 735 | error = gfs2_quota_lock_check(ip); | 735 | error = gfs2_quota_lock_check(ip, &ap); |
| 736 | if (error) | 736 | if (error) |
| 737 | return error; | 737 | return error; |
| 738 | 738 | ||
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c index d0929bc81782..98d4ea45bb70 100644 --- a/fs/hfs/inode.c +++ b/fs/hfs/inode.c | |||
| @@ -14,7 +14,7 @@ | |||
| 14 | #include <linux/pagemap.h> | 14 | #include <linux/pagemap.h> |
| 15 | #include <linux/mpage.h> | 15 | #include <linux/mpage.h> |
| 16 | #include <linux/sched.h> | 16 | #include <linux/sched.h> |
| 17 | #include <linux/aio.h> | 17 | #include <linux/uio.h> |
| 18 | 18 | ||
| 19 | #include "hfs_fs.h" | 19 | #include "hfs_fs.h" |
| 20 | #include "btree.h" | 20 | #include "btree.h" |
diff --git a/fs/hfsplus/brec.c b/fs/hfsplus/brec.c index 6e560d56094b..754fdf8c6356 100644 --- a/fs/hfsplus/brec.c +++ b/fs/hfsplus/brec.c | |||
| @@ -131,13 +131,16 @@ skip: | |||
| 131 | hfs_bnode_write(node, entry, data_off + key_len, entry_len); | 131 | hfs_bnode_write(node, entry, data_off + key_len, entry_len); |
| 132 | hfs_bnode_dump(node); | 132 | hfs_bnode_dump(node); |
| 133 | 133 | ||
| 134 | if (new_node) { | 134 | /* |
| 135 | /* update parent key if we inserted a key | 135 | * update parent key if we inserted a key |
| 136 | * at the start of the first node | 136 | * at the start of the node and it is not the new node |
| 137 | */ | 137 | */ |
| 138 | if (!rec && new_node != node) | 138 | if (!rec && new_node != node) { |
| 139 | hfs_brec_update_parent(fd); | 139 | hfs_bnode_read_key(node, fd->search_key, data_off + size); |
| 140 | hfs_brec_update_parent(fd); | ||
| 141 | } | ||
| 140 | 142 | ||
| 143 | if (new_node) { | ||
| 141 | hfs_bnode_put(fd->bnode); | 144 | hfs_bnode_put(fd->bnode); |
| 142 | if (!new_node->parent) { | 145 | if (!new_node->parent) { |
| 143 | hfs_btree_inc_height(tree); | 146 | hfs_btree_inc_height(tree); |
| @@ -168,9 +171,6 @@ skip: | |||
| 168 | goto again; | 171 | goto again; |
| 169 | } | 172 | } |
| 170 | 173 | ||
| 171 | if (!rec) | ||
| 172 | hfs_brec_update_parent(fd); | ||
| 173 | |||
| 174 | return 0; | 174 | return 0; |
| 175 | } | 175 | } |
| 176 | 176 | ||
| @@ -370,6 +370,8 @@ again: | |||
| 370 | if (IS_ERR(parent)) | 370 | if (IS_ERR(parent)) |
| 371 | return PTR_ERR(parent); | 371 | return PTR_ERR(parent); |
| 372 | __hfs_brec_find(parent, fd, hfs_find_rec_by_key); | 372 | __hfs_brec_find(parent, fd, hfs_find_rec_by_key); |
| 373 | if (fd->record < 0) | ||
| 374 | return -ENOENT; | ||
| 373 | hfs_bnode_dump(parent); | 375 | hfs_bnode_dump(parent); |
| 374 | rec = fd->record; | 376 | rec = fd->record; |
| 375 | 377 | ||
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c index 0cf786f2d046..f541196d4ee9 100644 --- a/fs/hfsplus/inode.c +++ b/fs/hfsplus/inode.c | |||
| @@ -14,7 +14,7 @@ | |||
| 14 | #include <linux/pagemap.h> | 14 | #include <linux/pagemap.h> |
| 15 | #include <linux/mpage.h> | 15 | #include <linux/mpage.h> |
| 16 | #include <linux/sched.h> | 16 | #include <linux/sched.h> |
| 17 | #include <linux/aio.h> | 17 | #include <linux/uio.h> |
| 18 | 18 | ||
| 19 | #include "hfsplus_fs.h" | 19 | #include "hfsplus_fs.h" |
| 20 | #include "hfsplus_raw.h" | 20 | #include "hfsplus_raw.h" |
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index c274aca8e8dc..db76cec3ce21 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c | |||
| @@ -319,7 +319,7 @@ static int hugetlbfs_write_end(struct file *file, struct address_space *mapping, | |||
| 319 | 319 | ||
| 320 | static void truncate_huge_page(struct page *page) | 320 | static void truncate_huge_page(struct page *page) |
| 321 | { | 321 | { |
| 322 | cancel_dirty_page(page, /* No IO accounting for huge pages? */0); | 322 | ClearPageDirty(page); |
| 323 | ClearPageUptodate(page); | 323 | ClearPageUptodate(page); |
| 324 | delete_from_page_cache(page); | 324 | delete_from_page_cache(page); |
| 325 | } | 325 | } |
diff --git a/fs/jffs2/xattr.c b/fs/jffs2/xattr.c index d72817ac51f6..762c7a3cf43d 100644 --- a/fs/jffs2/xattr.c +++ b/fs/jffs2/xattr.c | |||
| @@ -195,7 +195,7 @@ static int do_verify_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_dat | |||
| 195 | /* unchecked xdatum is chained with c->xattr_unchecked */ | 195 | /* unchecked xdatum is chained with c->xattr_unchecked */ |
| 196 | list_del_init(&xd->xindex); | 196 | list_del_init(&xd->xindex); |
| 197 | 197 | ||
| 198 | dbg_xattr("success on verfying xdatum (xid=%u, version=%u)\n", | 198 | dbg_xattr("success on verifying xdatum (xid=%u, version=%u)\n", |
| 199 | xd->xid, xd->version); | 199 | xd->xid, xd->version); |
| 200 | 200 | ||
| 201 | return 0; | 201 | return 0; |
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c index bd3df1ca3c9b..3197aed10614 100644 --- a/fs/jfs/inode.c +++ b/fs/jfs/inode.c | |||
| @@ -22,8 +22,8 @@ | |||
| 22 | #include <linux/buffer_head.h> | 22 | #include <linux/buffer_head.h> |
| 23 | #include <linux/pagemap.h> | 23 | #include <linux/pagemap.h> |
| 24 | #include <linux/quotaops.h> | 24 | #include <linux/quotaops.h> |
| 25 | #include <linux/uio.h> | ||
| 25 | #include <linux/writeback.h> | 26 | #include <linux/writeback.h> |
| 26 | #include <linux/aio.h> | ||
| 27 | #include "jfs_incore.h" | 27 | #include "jfs_incore.h" |
| 28 | #include "jfs_inode.h" | 28 | #include "jfs_inode.h" |
| 29 | #include "jfs_filsys.h" | 29 | #include "jfs_filsys.h" |
diff --git a/fs/jfs/super.c b/fs/jfs/super.c index 5d30c56ae075..4cd9798f4948 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c | |||
| @@ -102,7 +102,7 @@ void jfs_error(struct super_block *sb, const char *fmt, ...) | |||
| 102 | vaf.fmt = fmt; | 102 | vaf.fmt = fmt; |
| 103 | vaf.va = &args; | 103 | vaf.va = &args; |
| 104 | 104 | ||
| 105 | pr_err("ERROR: (device %s): %pf: %pV\n", | 105 | pr_err("ERROR: (device %s): %ps: %pV\n", |
| 106 | sb->s_id, __builtin_return_address(0), &vaf); | 106 | sb->s_id, __builtin_return_address(0), &vaf); |
| 107 | 107 | ||
| 108 | va_end(args); | 108 | va_end(args); |
diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c index b684e8a132e6..2bacb9988566 100644 --- a/fs/kernfs/file.c +++ b/fs/kernfs/file.c | |||
| @@ -207,6 +207,7 @@ static ssize_t kernfs_file_direct_read(struct kernfs_open_file *of, | |||
| 207 | goto out_free; | 207 | goto out_free; |
| 208 | } | 208 | } |
| 209 | 209 | ||
| 210 | of->event = atomic_read(&of->kn->attr.open->event); | ||
| 210 | ops = kernfs_ops(of->kn); | 211 | ops = kernfs_ops(of->kn); |
| 211 | if (ops->read) | 212 | if (ops->read) |
| 212 | len = ops->read(of, buf, len, *ppos); | 213 | len = ops->read(of, buf, len, *ppos); |
diff --git a/fs/locks.c b/fs/locks.c index 528fedfda15e..40bc384728c0 100644 --- a/fs/locks.c +++ b/fs/locks.c | |||
| @@ -1388,9 +1388,8 @@ any_leases_conflict(struct inode *inode, struct file_lock *breaker) | |||
| 1388 | int __break_lease(struct inode *inode, unsigned int mode, unsigned int type) | 1388 | int __break_lease(struct inode *inode, unsigned int mode, unsigned int type) |
| 1389 | { | 1389 | { |
| 1390 | int error = 0; | 1390 | int error = 0; |
| 1391 | struct file_lock *new_fl; | ||
| 1392 | struct file_lock_context *ctx = inode->i_flctx; | 1391 | struct file_lock_context *ctx = inode->i_flctx; |
| 1393 | struct file_lock *fl; | 1392 | struct file_lock *new_fl, *fl, *tmp; |
| 1394 | unsigned long break_time; | 1393 | unsigned long break_time; |
| 1395 | int want_write = (mode & O_ACCMODE) != O_RDONLY; | 1394 | int want_write = (mode & O_ACCMODE) != O_RDONLY; |
| 1396 | LIST_HEAD(dispose); | 1395 | LIST_HEAD(dispose); |
| @@ -1420,7 +1419,7 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type) | |||
| 1420 | break_time++; /* so that 0 means no break time */ | 1419 | break_time++; /* so that 0 means no break time */ |
| 1421 | } | 1420 | } |
| 1422 | 1421 | ||
| 1423 | list_for_each_entry(fl, &ctx->flc_lease, fl_list) { | 1422 | list_for_each_entry_safe(fl, tmp, &ctx->flc_lease, fl_list) { |
| 1424 | if (!leases_conflict(fl, new_fl)) | 1423 | if (!leases_conflict(fl, new_fl)) |
| 1425 | continue; | 1424 | continue; |
| 1426 | if (want_write) { | 1425 | if (want_write) { |
diff --git a/fs/namei.c b/fs/namei.c index c83145af4bfc..76fb76a0818b 100644 --- a/fs/namei.c +++ b/fs/namei.c | |||
| @@ -119,15 +119,14 @@ | |||
| 119 | * PATH_MAX includes the nul terminator --RR. | 119 | * PATH_MAX includes the nul terminator --RR. |
| 120 | */ | 120 | */ |
| 121 | 121 | ||
| 122 | #define EMBEDDED_NAME_MAX (PATH_MAX - sizeof(struct filename)) | 122 | #define EMBEDDED_NAME_MAX (PATH_MAX - offsetof(struct filename, iname)) |
| 123 | 123 | ||
| 124 | struct filename * | 124 | struct filename * |
| 125 | getname_flags(const char __user *filename, int flags, int *empty) | 125 | getname_flags(const char __user *filename, int flags, int *empty) |
| 126 | { | 126 | { |
| 127 | struct filename *result, *err; | 127 | struct filename *result; |
| 128 | int len; | ||
| 129 | long max; | ||
| 130 | char *kname; | 128 | char *kname; |
| 129 | int len; | ||
| 131 | 130 | ||
| 132 | result = audit_reusename(filename); | 131 | result = audit_reusename(filename); |
| 133 | if (result) | 132 | if (result) |
| @@ -136,22 +135,18 @@ getname_flags(const char __user *filename, int flags, int *empty) | |||
| 136 | result = __getname(); | 135 | result = __getname(); |
| 137 | if (unlikely(!result)) | 136 | if (unlikely(!result)) |
| 138 | return ERR_PTR(-ENOMEM); | 137 | return ERR_PTR(-ENOMEM); |
| 139 | result->refcnt = 1; | ||
| 140 | 138 | ||
| 141 | /* | 139 | /* |
| 142 | * First, try to embed the struct filename inside the names_cache | 140 | * First, try to embed the struct filename inside the names_cache |
| 143 | * allocation | 141 | * allocation |
| 144 | */ | 142 | */ |
| 145 | kname = (char *)result + sizeof(*result); | 143 | kname = (char *)result->iname; |
| 146 | result->name = kname; | 144 | result->name = kname; |
| 147 | result->separate = false; | ||
| 148 | max = EMBEDDED_NAME_MAX; | ||
| 149 | 145 | ||
| 150 | recopy: | 146 | len = strncpy_from_user(kname, filename, EMBEDDED_NAME_MAX); |
| 151 | len = strncpy_from_user(kname, filename, max); | ||
| 152 | if (unlikely(len < 0)) { | 147 | if (unlikely(len < 0)) { |
| 153 | err = ERR_PTR(len); | 148 | __putname(result); |
| 154 | goto error; | 149 | return ERR_PTR(len); |
| 155 | } | 150 | } |
| 156 | 151 | ||
| 157 | /* | 152 | /* |
| @@ -160,43 +155,49 @@ recopy: | |||
| 160 | * names_cache allocation for the pathname, and re-do the copy from | 155 | * names_cache allocation for the pathname, and re-do the copy from |
| 161 | * userland. | 156 | * userland. |
| 162 | */ | 157 | */ |
| 163 | if (len == EMBEDDED_NAME_MAX && max == EMBEDDED_NAME_MAX) { | 158 | if (unlikely(len == EMBEDDED_NAME_MAX)) { |
| 159 | const size_t size = offsetof(struct filename, iname[1]); | ||
| 164 | kname = (char *)result; | 160 | kname = (char *)result; |
| 165 | 161 | ||
| 166 | result = kzalloc(sizeof(*result), GFP_KERNEL); | 162 | /* |
| 167 | if (!result) { | 163 | * size is chosen that way we to guarantee that |
| 168 | err = ERR_PTR(-ENOMEM); | 164 | * result->iname[0] is within the same object and that |
| 169 | result = (struct filename *)kname; | 165 | * kname can't be equal to result->iname, no matter what. |
| 170 | goto error; | 166 | */ |
| 167 | result = kzalloc(size, GFP_KERNEL); | ||
| 168 | if (unlikely(!result)) { | ||
| 169 | __putname(kname); | ||
| 170 | return ERR_PTR(-ENOMEM); | ||
| 171 | } | 171 | } |
| 172 | result->name = kname; | 172 | result->name = kname; |
| 173 | result->separate = true; | 173 | len = strncpy_from_user(kname, filename, PATH_MAX); |
| 174 | result->refcnt = 1; | 174 | if (unlikely(len < 0)) { |
| 175 | max = PATH_MAX; | 175 | __putname(kname); |
| 176 | goto recopy; | 176 | kfree(result); |
| 177 | return ERR_PTR(len); | ||
| 178 | } | ||
| 179 | if (unlikely(len == PATH_MAX)) { | ||
| 180 | __putname(kname); | ||
| 181 | kfree(result); | ||
| 182 | return ERR_PTR(-ENAMETOOLONG); | ||
| 183 | } | ||
| 177 | } | 184 | } |
| 178 | 185 | ||
| 186 | result->refcnt = 1; | ||
| 179 | /* The empty path is special. */ | 187 | /* The empty path is special. */ |
| 180 | if (unlikely(!len)) { | 188 | if (unlikely(!len)) { |
| 181 | if (empty) | 189 | if (empty) |
| 182 | *empty = 1; | 190 | *empty = 1; |
| 183 | err = ERR_PTR(-ENOENT); | 191 | if (!(flags & LOOKUP_EMPTY)) { |
| 184 | if (!(flags & LOOKUP_EMPTY)) | 192 | putname(result); |
| 185 | goto error; | 193 | return ERR_PTR(-ENOENT); |
| 194 | } | ||
| 186 | } | 195 | } |
| 187 | 196 | ||
| 188 | err = ERR_PTR(-ENAMETOOLONG); | ||
| 189 | if (unlikely(len >= PATH_MAX)) | ||
| 190 | goto error; | ||
| 191 | |||
| 192 | result->uptr = filename; | 197 | result->uptr = filename; |
| 193 | result->aname = NULL; | 198 | result->aname = NULL; |
| 194 | audit_getname(result); | 199 | audit_getname(result); |
| 195 | return result; | 200 | return result; |
| 196 | |||
| 197 | error: | ||
| 198 | putname(result); | ||
| 199 | return err; | ||
| 200 | } | 201 | } |
| 201 | 202 | ||
| 202 | struct filename * | 203 | struct filename * |
| @@ -216,8 +217,7 @@ getname_kernel(const char * filename) | |||
| 216 | return ERR_PTR(-ENOMEM); | 217 | return ERR_PTR(-ENOMEM); |
| 217 | 218 | ||
| 218 | if (len <= EMBEDDED_NAME_MAX) { | 219 | if (len <= EMBEDDED_NAME_MAX) { |
| 219 | result->name = (char *)(result) + sizeof(*result); | 220 | result->name = (char *)result->iname; |
| 220 | result->separate = false; | ||
| 221 | } else if (len <= PATH_MAX) { | 221 | } else if (len <= PATH_MAX) { |
| 222 | struct filename *tmp; | 222 | struct filename *tmp; |
| 223 | 223 | ||
| @@ -227,7 +227,6 @@ getname_kernel(const char * filename) | |||
| 227 | return ERR_PTR(-ENOMEM); | 227 | return ERR_PTR(-ENOMEM); |
| 228 | } | 228 | } |
| 229 | tmp->name = (char *)result; | 229 | tmp->name = (char *)result; |
| 230 | tmp->separate = true; | ||
| 231 | result = tmp; | 230 | result = tmp; |
| 232 | } else { | 231 | } else { |
| 233 | __putname(result); | 232 | __putname(result); |
| @@ -249,7 +248,7 @@ void putname(struct filename *name) | |||
| 249 | if (--name->refcnt > 0) | 248 | if (--name->refcnt > 0) |
| 250 | return; | 249 | return; |
| 251 | 250 | ||
| 252 | if (name->separate) { | 251 | if (name->name != name->iname) { |
| 253 | __putname(name->name); | 252 | __putname(name->name); |
| 254 | kfree(name); | 253 | kfree(name); |
| 255 | } else | 254 | } else |
| @@ -1851,10 +1850,11 @@ static int link_path_walk(const char *name, struct nameidata *nd) | |||
| 1851 | return err; | 1850 | return err; |
| 1852 | } | 1851 | } |
| 1853 | 1852 | ||
| 1854 | static int path_init(int dfd, const char *name, unsigned int flags, | 1853 | static int path_init(int dfd, const struct filename *name, unsigned int flags, |
| 1855 | struct nameidata *nd) | 1854 | struct nameidata *nd) |
| 1856 | { | 1855 | { |
| 1857 | int retval = 0; | 1856 | int retval = 0; |
| 1857 | const char *s = name->name; | ||
| 1858 | 1858 | ||
| 1859 | nd->last_type = LAST_ROOT; /* if there are only slashes... */ | 1859 | nd->last_type = LAST_ROOT; /* if there are only slashes... */ |
| 1860 | nd->flags = flags | LOOKUP_JUMPED | LOOKUP_PARENT; | 1860 | nd->flags = flags | LOOKUP_JUMPED | LOOKUP_PARENT; |
| @@ -1863,7 +1863,7 @@ static int path_init(int dfd, const char *name, unsigned int flags, | |||
| 1863 | if (flags & LOOKUP_ROOT) { | 1863 | if (flags & LOOKUP_ROOT) { |
| 1864 | struct dentry *root = nd->root.dentry; | 1864 | struct dentry *root = nd->root.dentry; |
| 1865 | struct inode *inode = root->d_inode; | 1865 | struct inode *inode = root->d_inode; |
| 1866 | if (*name) { | 1866 | if (*s) { |
| 1867 | if (!d_can_lookup(root)) | 1867 | if (!d_can_lookup(root)) |
| 1868 | return -ENOTDIR; | 1868 | return -ENOTDIR; |
| 1869 | retval = inode_permission(inode, MAY_EXEC); | 1869 | retval = inode_permission(inode, MAY_EXEC); |
| @@ -1885,7 +1885,7 @@ static int path_init(int dfd, const char *name, unsigned int flags, | |||
| 1885 | nd->root.mnt = NULL; | 1885 | nd->root.mnt = NULL; |
| 1886 | 1886 | ||
| 1887 | nd->m_seq = read_seqbegin(&mount_lock); | 1887 | nd->m_seq = read_seqbegin(&mount_lock); |
| 1888 | if (*name=='/') { | 1888 | if (*s == '/') { |
| 1889 | if (flags & LOOKUP_RCU) { | 1889 | if (flags & LOOKUP_RCU) { |
| 1890 | rcu_read_lock(); | 1890 | rcu_read_lock(); |
| 1891 | nd->seq = set_root_rcu(nd); | 1891 | nd->seq = set_root_rcu(nd); |
| @@ -1919,7 +1919,7 @@ static int path_init(int dfd, const char *name, unsigned int flags, | |||
| 1919 | 1919 | ||
| 1920 | dentry = f.file->f_path.dentry; | 1920 | dentry = f.file->f_path.dentry; |
| 1921 | 1921 | ||
| 1922 | if (*name) { | 1922 | if (*s) { |
| 1923 | if (!d_can_lookup(dentry)) { | 1923 | if (!d_can_lookup(dentry)) { |
| 1924 | fdput(f); | 1924 | fdput(f); |
| 1925 | return -ENOTDIR; | 1925 | return -ENOTDIR; |
| @@ -1949,7 +1949,7 @@ static int path_init(int dfd, const char *name, unsigned int flags, | |||
| 1949 | return -ECHILD; | 1949 | return -ECHILD; |
| 1950 | done: | 1950 | done: |
| 1951 | current->total_link_count = 0; | 1951 | current->total_link_count = 0; |
| 1952 | return link_path_walk(name, nd); | 1952 | return link_path_walk(s, nd); |
| 1953 | } | 1953 | } |
| 1954 | 1954 | ||
| 1955 | static void path_cleanup(struct nameidata *nd) | 1955 | static void path_cleanup(struct nameidata *nd) |
| @@ -1972,7 +1972,7 @@ static inline int lookup_last(struct nameidata *nd, struct path *path) | |||
| 1972 | } | 1972 | } |
| 1973 | 1973 | ||
| 1974 | /* Returns 0 and nd will be valid on success; Retuns error, otherwise. */ | 1974 | /* Returns 0 and nd will be valid on success; Retuns error, otherwise. */ |
| 1975 | static int path_lookupat(int dfd, const char *name, | 1975 | static int path_lookupat(int dfd, const struct filename *name, |
| 1976 | unsigned int flags, struct nameidata *nd) | 1976 | unsigned int flags, struct nameidata *nd) |
| 1977 | { | 1977 | { |
| 1978 | struct path path; | 1978 | struct path path; |
| @@ -2027,31 +2027,17 @@ static int path_lookupat(int dfd, const char *name, | |||
| 2027 | static int filename_lookup(int dfd, struct filename *name, | 2027 | static int filename_lookup(int dfd, struct filename *name, |
| 2028 | unsigned int flags, struct nameidata *nd) | 2028 | unsigned int flags, struct nameidata *nd) |
| 2029 | { | 2029 | { |
| 2030 | int retval = path_lookupat(dfd, name->name, flags | LOOKUP_RCU, nd); | 2030 | int retval = path_lookupat(dfd, name, flags | LOOKUP_RCU, nd); |
| 2031 | if (unlikely(retval == -ECHILD)) | 2031 | if (unlikely(retval == -ECHILD)) |
| 2032 | retval = path_lookupat(dfd, name->name, flags, nd); | 2032 | retval = path_lookupat(dfd, name, flags, nd); |
| 2033 | if (unlikely(retval == -ESTALE)) | 2033 | if (unlikely(retval == -ESTALE)) |
| 2034 | retval = path_lookupat(dfd, name->name, | 2034 | retval = path_lookupat(dfd, name, flags | LOOKUP_REVAL, nd); |
| 2035 | flags | LOOKUP_REVAL, nd); | ||
| 2036 | 2035 | ||
| 2037 | if (likely(!retval)) | 2036 | if (likely(!retval)) |
| 2038 | audit_inode(name, nd->path.dentry, flags & LOOKUP_PARENT); | 2037 | audit_inode(name, nd->path.dentry, flags & LOOKUP_PARENT); |
| 2039 | return retval; | 2038 | return retval; |
| 2040 | } | 2039 | } |
| 2041 | 2040 | ||
| 2042 | static int do_path_lookup(int dfd, const char *name, | ||
| 2043 | unsigned int flags, struct nameidata *nd) | ||
| 2044 | { | ||
| 2045 | struct filename *filename = getname_kernel(name); | ||
| 2046 | int retval = PTR_ERR(filename); | ||
| 2047 | |||
| 2048 | if (!IS_ERR(filename)) { | ||
| 2049 | retval = filename_lookup(dfd, filename, flags, nd); | ||
| 2050 | putname(filename); | ||
| 2051 | } | ||
| 2052 | return retval; | ||
| 2053 | } | ||
| 2054 | |||
| 2055 | /* does lookup, returns the object with parent locked */ | 2041 | /* does lookup, returns the object with parent locked */ |
| 2056 | struct dentry *kern_path_locked(const char *name, struct path *path) | 2042 | struct dentry *kern_path_locked(const char *name, struct path *path) |
| 2057 | { | 2043 | { |
| @@ -2089,9 +2075,15 @@ out: | |||
| 2089 | int kern_path(const char *name, unsigned int flags, struct path *path) | 2075 | int kern_path(const char *name, unsigned int flags, struct path *path) |
| 2090 | { | 2076 | { |
| 2091 | struct nameidata nd; | 2077 | struct nameidata nd; |
| 2092 | int res = do_path_lookup(AT_FDCWD, name, flags, &nd); | 2078 | struct filename *filename = getname_kernel(name); |
| 2093 | if (!res) | 2079 | int res = PTR_ERR(filename); |
| 2094 | *path = nd.path; | 2080 | |
| 2081 | if (!IS_ERR(filename)) { | ||
| 2082 | res = filename_lookup(AT_FDCWD, filename, flags, &nd); | ||
| 2083 | putname(filename); | ||
| 2084 | if (!res) | ||
| 2085 | *path = nd.path; | ||
| 2086 | } | ||
| 2095 | return res; | 2087 | return res; |
| 2096 | } | 2088 | } |
| 2097 | EXPORT_SYMBOL(kern_path); | 2089 | EXPORT_SYMBOL(kern_path); |
| @@ -2108,15 +2100,22 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt, | |||
| 2108 | const char *name, unsigned int flags, | 2100 | const char *name, unsigned int flags, |
| 2109 | struct path *path) | 2101 | struct path *path) |
| 2110 | { | 2102 | { |
| 2111 | struct nameidata nd; | 2103 | struct filename *filename = getname_kernel(name); |
| 2112 | int err; | 2104 | int err = PTR_ERR(filename); |
| 2113 | nd.root.dentry = dentry; | 2105 | |
| 2114 | nd.root.mnt = mnt; | ||
| 2115 | BUG_ON(flags & LOOKUP_PARENT); | 2106 | BUG_ON(flags & LOOKUP_PARENT); |
| 2116 | /* the first argument of do_path_lookup() is ignored with LOOKUP_ROOT */ | 2107 | |
| 2117 | err = do_path_lookup(AT_FDCWD, name, flags | LOOKUP_ROOT, &nd); | 2108 | /* the first argument of filename_lookup() is ignored with LOOKUP_ROOT */ |
| 2118 | if (!err) | 2109 | if (!IS_ERR(filename)) { |
| 2119 | *path = nd.path; | 2110 | struct nameidata nd; |
| 2111 | nd.root.dentry = dentry; | ||
| 2112 | nd.root.mnt = mnt; | ||
| 2113 | err = filename_lookup(AT_FDCWD, filename, | ||
| 2114 | flags | LOOKUP_ROOT, &nd); | ||
| 2115 | if (!err) | ||
| 2116 | *path = nd.path; | ||
| 2117 | putname(filename); | ||
| 2118 | } | ||
| 2120 | return err; | 2119 | return err; |
| 2121 | } | 2120 | } |
| 2122 | EXPORT_SYMBOL(vfs_path_lookup); | 2121 | EXPORT_SYMBOL(vfs_path_lookup); |
| @@ -2138,9 +2137,7 @@ static struct dentry *lookup_hash(struct nameidata *nd) | |||
| 2138 | * @len: maximum length @len should be interpreted to | 2137 | * @len: maximum length @len should be interpreted to |
| 2139 | * | 2138 | * |
| 2140 | * Note that this routine is purely a helper for filesystem usage and should | 2139 | * Note that this routine is purely a helper for filesystem usage and should |
| 2141 | * not be called by generic code. Also note that by using this function the | 2140 | * not be called by generic code. |
| 2142 | * nameidata argument is passed to the filesystem methods and a filesystem | ||
| 2143 | * using this helper needs to be prepared for that. | ||
| 2144 | */ | 2141 | */ |
| 2145 | struct dentry *lookup_one_len(const char *name, struct dentry *base, int len) | 2142 | struct dentry *lookup_one_len(const char *name, struct dentry *base, int len) |
| 2146 | { | 2143 | { |
| @@ -2341,7 +2338,8 @@ out: | |||
| 2341 | * Returns 0 and "path" will be valid on success; Returns error otherwise. | 2338 | * Returns 0 and "path" will be valid on success; Returns error otherwise. |
| 2342 | */ | 2339 | */ |
| 2343 | static int | 2340 | static int |
| 2344 | path_mountpoint(int dfd, const char *name, struct path *path, unsigned int flags) | 2341 | path_mountpoint(int dfd, const struct filename *name, struct path *path, |
| 2342 | unsigned int flags) | ||
| 2345 | { | 2343 | { |
| 2346 | struct nameidata nd; | 2344 | struct nameidata nd; |
| 2347 | int err; | 2345 | int err; |
| @@ -2370,20 +2368,20 @@ out: | |||
| 2370 | } | 2368 | } |
| 2371 | 2369 | ||
| 2372 | static int | 2370 | static int |
| 2373 | filename_mountpoint(int dfd, struct filename *s, struct path *path, | 2371 | filename_mountpoint(int dfd, struct filename *name, struct path *path, |
| 2374 | unsigned int flags) | 2372 | unsigned int flags) |
| 2375 | { | 2373 | { |
| 2376 | int error; | 2374 | int error; |
| 2377 | if (IS_ERR(s)) | 2375 | if (IS_ERR(name)) |
| 2378 | return PTR_ERR(s); | 2376 | return PTR_ERR(name); |
| 2379 | error = path_mountpoint(dfd, s->name, path, flags | LOOKUP_RCU); | 2377 | error = path_mountpoint(dfd, name, path, flags | LOOKUP_RCU); |
| 2380 | if (unlikely(error == -ECHILD)) | 2378 | if (unlikely(error == -ECHILD)) |
| 2381 | error = path_mountpoint(dfd, s->name, path, flags); | 2379 | error = path_mountpoint(dfd, name, path, flags); |
| 2382 | if (unlikely(error == -ESTALE)) | 2380 | if (unlikely(error == -ESTALE)) |
| 2383 | error = path_mountpoint(dfd, s->name, path, flags | LOOKUP_REVAL); | 2381 | error = path_mountpoint(dfd, name, path, flags | LOOKUP_REVAL); |
| 2384 | if (likely(!error)) | 2382 | if (likely(!error)) |
| 2385 | audit_inode(s, path->dentry, 0); | 2383 | audit_inode(name, path->dentry, 0); |
| 2386 | putname(s); | 2384 | putname(name); |
| 2387 | return error; | 2385 | return error; |
| 2388 | } | 2386 | } |
| 2389 | 2387 | ||
| @@ -3156,7 +3154,7 @@ static int do_tmpfile(int dfd, struct filename *pathname, | |||
| 3156 | static const struct qstr name = QSTR_INIT("/", 1); | 3154 | static const struct qstr name = QSTR_INIT("/", 1); |
| 3157 | struct dentry *dentry, *child; | 3155 | struct dentry *dentry, *child; |
| 3158 | struct inode *dir; | 3156 | struct inode *dir; |
| 3159 | int error = path_lookupat(dfd, pathname->name, | 3157 | int error = path_lookupat(dfd, pathname, |
| 3160 | flags | LOOKUP_DIRECTORY, nd); | 3158 | flags | LOOKUP_DIRECTORY, nd); |
| 3161 | if (unlikely(error)) | 3159 | if (unlikely(error)) |
| 3162 | return error; | 3160 | return error; |
| @@ -3229,7 +3227,7 @@ static struct file *path_openat(int dfd, struct filename *pathname, | |||
| 3229 | goto out; | 3227 | goto out; |
| 3230 | } | 3228 | } |
| 3231 | 3229 | ||
| 3232 | error = path_init(dfd, pathname->name, flags, nd); | 3230 | error = path_init(dfd, pathname, flags, nd); |
| 3233 | if (unlikely(error)) | 3231 | if (unlikely(error)) |
| 3234 | goto out; | 3232 | goto out; |
| 3235 | 3233 | ||
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index e907c8cf732e..c3929fb2ab26 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c | |||
| @@ -265,7 +265,7 @@ ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, loff_t | |||
| 265 | 265 | ||
| 266 | return -EINVAL; | 266 | return -EINVAL; |
| 267 | #else | 267 | #else |
| 268 | VM_BUG_ON(iocb->ki_nbytes != PAGE_SIZE); | 268 | VM_BUG_ON(iov_iter_count(iter) != PAGE_SIZE); |
| 269 | 269 | ||
| 270 | if (rw == READ) | 270 | if (rw == READ) |
| 271 | return nfs_file_direct_read(iocb, iter, pos); | 271 | return nfs_file_direct_read(iocb, iter, pos); |
| @@ -393,7 +393,7 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq, bool write) | |||
| 393 | long res = (long) dreq->error; | 393 | long res = (long) dreq->error; |
| 394 | if (!res) | 394 | if (!res) |
| 395 | res = (long) dreq->count; | 395 | res = (long) dreq->count; |
| 396 | aio_complete(dreq->iocb, res, 0); | 396 | dreq->iocb->ki_complete(dreq->iocb, res, 0); |
| 397 | } | 397 | } |
| 398 | 398 | ||
| 399 | complete_all(&dreq->completion); | 399 | complete_all(&dreq->completion); |
diff --git a/fs/nfs/file.c b/fs/nfs/file.c index e679d24c39d3..37b15582e0de 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c | |||
| @@ -26,7 +26,6 @@ | |||
| 26 | #include <linux/nfs_mount.h> | 26 | #include <linux/nfs_mount.h> |
| 27 | #include <linux/mm.h> | 27 | #include <linux/mm.h> |
| 28 | #include <linux/pagemap.h> | 28 | #include <linux/pagemap.h> |
| 29 | #include <linux/aio.h> | ||
| 30 | #include <linux/gfp.h> | 29 | #include <linux/gfp.h> |
| 31 | #include <linux/swap.h> | 30 | #include <linux/swap.h> |
| 32 | 31 | ||
diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 849ed784d6ac..759931088094 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c | |||
| @@ -1876,11 +1876,6 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page) | |||
| 1876 | * request from the inode / page_private pointer and | 1876 | * request from the inode / page_private pointer and |
| 1877 | * release it */ | 1877 | * release it */ |
| 1878 | nfs_inode_remove_request(req); | 1878 | nfs_inode_remove_request(req); |
| 1879 | /* | ||
| 1880 | * In case nfs_inode_remove_request has marked the | ||
| 1881 | * page as being dirty | ||
| 1882 | */ | ||
| 1883 | cancel_dirty_page(page, PAGE_CACHE_SIZE); | ||
| 1884 | nfs_unlock_and_release_request(req); | 1879 | nfs_unlock_and_release_request(req); |
| 1885 | } | 1880 | } |
| 1886 | 1881 | ||
diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c index cdbc78c72542..03d647bf195d 100644 --- a/fs/nfsd/blocklayout.c +++ b/fs/nfsd/blocklayout.c | |||
| @@ -137,7 +137,7 @@ nfsd4_block_proc_layoutget(struct inode *inode, const struct svc_fh *fhp, | |||
| 137 | seg->offset = iomap.offset; | 137 | seg->offset = iomap.offset; |
| 138 | seg->length = iomap.length; | 138 | seg->length = iomap.length; |
| 139 | 139 | ||
| 140 | dprintk("GET: %lld:%lld %d\n", bex->foff, bex->len, bex->es); | 140 | dprintk("GET: 0x%llx:0x%llx %d\n", bex->foff, bex->len, bex->es); |
| 141 | return 0; | 141 | return 0; |
| 142 | 142 | ||
| 143 | out_error: | 143 | out_error: |
diff --git a/fs/nfsd/blocklayoutxdr.c b/fs/nfsd/blocklayoutxdr.c index 9da89fddab33..9aa2796da90d 100644 --- a/fs/nfsd/blocklayoutxdr.c +++ b/fs/nfsd/blocklayoutxdr.c | |||
| @@ -122,19 +122,19 @@ nfsd4_block_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp, | |||
| 122 | 122 | ||
| 123 | p = xdr_decode_hyper(p, &bex.foff); | 123 | p = xdr_decode_hyper(p, &bex.foff); |
| 124 | if (bex.foff & (block_size - 1)) { | 124 | if (bex.foff & (block_size - 1)) { |
| 125 | dprintk("%s: unaligned offset %lld\n", | 125 | dprintk("%s: unaligned offset 0x%llx\n", |
| 126 | __func__, bex.foff); | 126 | __func__, bex.foff); |
| 127 | goto fail; | 127 | goto fail; |
| 128 | } | 128 | } |
| 129 | p = xdr_decode_hyper(p, &bex.len); | 129 | p = xdr_decode_hyper(p, &bex.len); |
| 130 | if (bex.len & (block_size - 1)) { | 130 | if (bex.len & (block_size - 1)) { |
| 131 | dprintk("%s: unaligned length %lld\n", | 131 | dprintk("%s: unaligned length 0x%llx\n", |
| 132 | __func__, bex.foff); | 132 | __func__, bex.foff); |
| 133 | goto fail; | 133 | goto fail; |
| 134 | } | 134 | } |
| 135 | p = xdr_decode_hyper(p, &bex.soff); | 135 | p = xdr_decode_hyper(p, &bex.soff); |
| 136 | if (bex.soff & (block_size - 1)) { | 136 | if (bex.soff & (block_size - 1)) { |
| 137 | dprintk("%s: unaligned disk offset %lld\n", | 137 | dprintk("%s: unaligned disk offset 0x%llx\n", |
| 138 | __func__, bex.soff); | 138 | __func__, bex.soff); |
| 139 | goto fail; | 139 | goto fail; |
| 140 | } | 140 | } |
diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c index 3c1bfa155571..6904213a4363 100644 --- a/fs/nfsd/nfs4layouts.c +++ b/fs/nfsd/nfs4layouts.c | |||
| @@ -118,7 +118,7 @@ void nfsd4_setup_layout_type(struct svc_export *exp) | |||
| 118 | { | 118 | { |
| 119 | struct super_block *sb = exp->ex_path.mnt->mnt_sb; | 119 | struct super_block *sb = exp->ex_path.mnt->mnt_sb; |
| 120 | 120 | ||
| 121 | if (exp->ex_flags & NFSEXP_NOPNFS) | 121 | if (!(exp->ex_flags & NFSEXP_PNFS)) |
| 122 | return; | 122 | return; |
| 123 | 123 | ||
| 124 | if (sb->s_export_op->get_uuid && | 124 | if (sb->s_export_op->get_uuid && |
| @@ -440,15 +440,14 @@ nfsd4_return_file_layout(struct nfs4_layout *lp, struct nfsd4_layout_seg *seg, | |||
| 440 | list_move_tail(&lp->lo_perstate, reaplist); | 440 | list_move_tail(&lp->lo_perstate, reaplist); |
| 441 | return; | 441 | return; |
| 442 | } | 442 | } |
| 443 | end = seg->offset; | 443 | lo->offset = layout_end(seg); |
| 444 | } else { | 444 | } else { |
| 445 | /* retain the whole layout segment on a split. */ | 445 | /* retain the whole layout segment on a split. */ |
| 446 | if (layout_end(seg) < end) { | 446 | if (layout_end(seg) < end) { |
| 447 | dprintk("%s: split not supported\n", __func__); | 447 | dprintk("%s: split not supported\n", __func__); |
| 448 | return; | 448 | return; |
| 449 | } | 449 | } |
| 450 | 450 | end = seg->offset; | |
| 451 | lo->offset = layout_end(seg); | ||
| 452 | } | 451 | } |
| 453 | 452 | ||
| 454 | layout_update_len(lo, end); | 453 | layout_update_len(lo, end); |
| @@ -513,6 +512,9 @@ nfsd4_return_client_layouts(struct svc_rqst *rqstp, | |||
| 513 | 512 | ||
| 514 | spin_lock(&clp->cl_lock); | 513 | spin_lock(&clp->cl_lock); |
| 515 | list_for_each_entry_safe(ls, n, &clp->cl_lo_states, ls_perclnt) { | 514 | list_for_each_entry_safe(ls, n, &clp->cl_lo_states, ls_perclnt) { |
| 515 | if (ls->ls_layout_type != lrp->lr_layout_type) | ||
| 516 | continue; | ||
| 517 | |||
| 516 | if (lrp->lr_return_type == RETURN_FSID && | 518 | if (lrp->lr_return_type == RETURN_FSID && |
| 517 | !fh_fsid_match(&ls->ls_stid.sc_file->fi_fhandle, | 519 | !fh_fsid_match(&ls->ls_stid.sc_file->fi_fhandle, |
| 518 | &cstate->current_fh.fh_handle)) | 520 | &cstate->current_fh.fh_handle)) |
| @@ -587,7 +589,7 @@ nfsd4_cb_layout_fail(struct nfs4_layout_stateid *ls) | |||
| 587 | 589 | ||
| 588 | rpc_ntop((struct sockaddr *)&clp->cl_addr, addr_str, sizeof(addr_str)); | 590 | rpc_ntop((struct sockaddr *)&clp->cl_addr, addr_str, sizeof(addr_str)); |
| 589 | 591 | ||
| 590 | nfsd4_cb_layout_fail(ls); | 592 | trace_layout_recall_fail(&ls->ls_stid.sc_stateid); |
| 591 | 593 | ||
| 592 | printk(KERN_WARNING | 594 | printk(KERN_WARNING |
| 593 | "nfsd: client %s failed to respond to layout recall. " | 595 | "nfsd: client %s failed to respond to layout recall. " |
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index d30bea8d0277..92b9d97aff4f 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c | |||
| @@ -1237,8 +1237,8 @@ nfsd4_getdeviceinfo(struct svc_rqst *rqstp, | |||
| 1237 | nfserr = ops->proc_getdeviceinfo(exp->ex_path.mnt->mnt_sb, gdp); | 1237 | nfserr = ops->proc_getdeviceinfo(exp->ex_path.mnt->mnt_sb, gdp); |
| 1238 | 1238 | ||
| 1239 | gdp->gd_notify_types &= ops->notify_types; | 1239 | gdp->gd_notify_types &= ops->notify_types; |
| 1240 | exp_put(exp); | ||
| 1241 | out: | 1240 | out: |
| 1241 | exp_put(exp); | ||
| 1242 | return nfserr; | 1242 | return nfserr; |
| 1243 | } | 1243 | } |
| 1244 | 1244 | ||
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index d2f2c37dc2db..8ba1d888f1e6 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c | |||
| @@ -3221,7 +3221,7 @@ alloc_init_open_stateowner(unsigned int strhashval, struct nfsd4_open *open, | |||
| 3221 | } else | 3221 | } else |
| 3222 | nfs4_free_openowner(&oo->oo_owner); | 3222 | nfs4_free_openowner(&oo->oo_owner); |
| 3223 | spin_unlock(&clp->cl_lock); | 3223 | spin_unlock(&clp->cl_lock); |
| 3224 | return oo; | 3224 | return ret; |
| 3225 | } | 3225 | } |
| 3226 | 3226 | ||
| 3227 | static void init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp, struct nfsd4_open *open) { | 3227 | static void init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp, struct nfsd4_open *open) { |
| @@ -5062,7 +5062,7 @@ alloc_init_lock_stateowner(unsigned int strhashval, struct nfs4_client *clp, | |||
| 5062 | } else | 5062 | } else |
| 5063 | nfs4_free_lockowner(&lo->lo_owner); | 5063 | nfs4_free_lockowner(&lo->lo_owner); |
| 5064 | spin_unlock(&clp->cl_lock); | 5064 | spin_unlock(&clp->cl_lock); |
| 5065 | return lo; | 5065 | return ret; |
| 5066 | } | 5066 | } |
| 5067 | 5067 | ||
| 5068 | static void | 5068 | static void |
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index df5e66caf100..5fb7e78169a6 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c | |||
| @@ -1562,7 +1562,11 @@ nfsd4_decode_layoutget(struct nfsd4_compoundargs *argp, | |||
| 1562 | p = xdr_decode_hyper(p, &lgp->lg_seg.offset); | 1562 | p = xdr_decode_hyper(p, &lgp->lg_seg.offset); |
| 1563 | p = xdr_decode_hyper(p, &lgp->lg_seg.length); | 1563 | p = xdr_decode_hyper(p, &lgp->lg_seg.length); |
| 1564 | p = xdr_decode_hyper(p, &lgp->lg_minlength); | 1564 | p = xdr_decode_hyper(p, &lgp->lg_minlength); |
| 1565 | nfsd4_decode_stateid(argp, &lgp->lg_sid); | 1565 | |
| 1566 | status = nfsd4_decode_stateid(argp, &lgp->lg_sid); | ||
| 1567 | if (status) | ||
| 1568 | return status; | ||
| 1569 | |||
| 1566 | READ_BUF(4); | 1570 | READ_BUF(4); |
| 1567 | lgp->lg_maxcount = be32_to_cpup(p++); | 1571 | lgp->lg_maxcount = be32_to_cpup(p++); |
| 1568 | 1572 | ||
| @@ -1580,7 +1584,11 @@ nfsd4_decode_layoutcommit(struct nfsd4_compoundargs *argp, | |||
| 1580 | p = xdr_decode_hyper(p, &lcp->lc_seg.offset); | 1584 | p = xdr_decode_hyper(p, &lcp->lc_seg.offset); |
| 1581 | p = xdr_decode_hyper(p, &lcp->lc_seg.length); | 1585 | p = xdr_decode_hyper(p, &lcp->lc_seg.length); |
| 1582 | lcp->lc_reclaim = be32_to_cpup(p++); | 1586 | lcp->lc_reclaim = be32_to_cpup(p++); |
| 1583 | nfsd4_decode_stateid(argp, &lcp->lc_sid); | 1587 | |
| 1588 | status = nfsd4_decode_stateid(argp, &lcp->lc_sid); | ||
| 1589 | if (status) | ||
| 1590 | return status; | ||
| 1591 | |||
| 1584 | READ_BUF(4); | 1592 | READ_BUF(4); |
| 1585 | lcp->lc_newoffset = be32_to_cpup(p++); | 1593 | lcp->lc_newoffset = be32_to_cpup(p++); |
| 1586 | if (lcp->lc_newoffset) { | 1594 | if (lcp->lc_newoffset) { |
| @@ -1628,7 +1636,11 @@ nfsd4_decode_layoutreturn(struct nfsd4_compoundargs *argp, | |||
| 1628 | READ_BUF(16); | 1636 | READ_BUF(16); |
| 1629 | p = xdr_decode_hyper(p, &lrp->lr_seg.offset); | 1637 | p = xdr_decode_hyper(p, &lrp->lr_seg.offset); |
| 1630 | p = xdr_decode_hyper(p, &lrp->lr_seg.length); | 1638 | p = xdr_decode_hyper(p, &lrp->lr_seg.length); |
| 1631 | nfsd4_decode_stateid(argp, &lrp->lr_sid); | 1639 | |
| 1640 | status = nfsd4_decode_stateid(argp, &lrp->lr_sid); | ||
| 1641 | if (status) | ||
| 1642 | return status; | ||
| 1643 | |||
| 1632 | READ_BUF(4); | 1644 | READ_BUF(4); |
| 1633 | lrp->lrf_body_len = be32_to_cpup(p++); | 1645 | lrp->lrf_body_len = be32_to_cpup(p++); |
| 1634 | if (lrp->lrf_body_len > 0) { | 1646 | if (lrp->lrf_body_len > 0) { |
| @@ -4123,7 +4135,7 @@ nfsd4_encode_layoutreturn(struct nfsd4_compoundres *resp, __be32 nfserr, | |||
| 4123 | return nfserr_resource; | 4135 | return nfserr_resource; |
| 4124 | *p++ = cpu_to_be32(lrp->lrs_present); | 4136 | *p++ = cpu_to_be32(lrp->lrs_present); |
| 4125 | if (lrp->lrs_present) | 4137 | if (lrp->lrs_present) |
| 4126 | nfsd4_encode_stateid(xdr, &lrp->lr_sid); | 4138 | return nfsd4_encode_stateid(xdr, &lrp->lr_sid); |
| 4127 | return nfs_ok; | 4139 | return nfs_ok; |
| 4128 | } | 4140 | } |
| 4129 | #endif /* CONFIG_NFSD_PNFS */ | 4141 | #endif /* CONFIG_NFSD_PNFS */ |
diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c index 83a9694ec485..46ec934f5dee 100644 --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c | |||
| @@ -165,13 +165,17 @@ int nfsd_reply_cache_init(void) | |||
| 165 | { | 165 | { |
| 166 | unsigned int hashsize; | 166 | unsigned int hashsize; |
| 167 | unsigned int i; | 167 | unsigned int i; |
| 168 | int status = 0; | ||
| 168 | 169 | ||
| 169 | max_drc_entries = nfsd_cache_size_limit(); | 170 | max_drc_entries = nfsd_cache_size_limit(); |
| 170 | atomic_set(&num_drc_entries, 0); | 171 | atomic_set(&num_drc_entries, 0); |
| 171 | hashsize = nfsd_hashsize(max_drc_entries); | 172 | hashsize = nfsd_hashsize(max_drc_entries); |
| 172 | maskbits = ilog2(hashsize); | 173 | maskbits = ilog2(hashsize); |
| 173 | 174 | ||
| 174 | register_shrinker(&nfsd_reply_cache_shrinker); | 175 | status = register_shrinker(&nfsd_reply_cache_shrinker); |
| 176 | if (status) | ||
| 177 | return status; | ||
| 178 | |||
| 175 | drc_slab = kmem_cache_create("nfsd_drc", sizeof(struct svc_cacherep), | 179 | drc_slab = kmem_cache_create("nfsd_drc", sizeof(struct svc_cacherep), |
| 176 | 0, 0, NULL); | 180 | 0, 0, NULL); |
| 177 | if (!drc_slab) | 181 | if (!drc_slab) |
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index 8b5969538f39..ab4987bc637f 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c | |||
| @@ -26,7 +26,7 @@ | |||
| 26 | #include <linux/mpage.h> | 26 | #include <linux/mpage.h> |
| 27 | #include <linux/pagemap.h> | 27 | #include <linux/pagemap.h> |
| 28 | #include <linux/writeback.h> | 28 | #include <linux/writeback.h> |
| 29 | #include <linux/aio.h> | 29 | #include <linux/uio.h> |
| 30 | #include "nilfs.h" | 30 | #include "nilfs.h" |
| 31 | #include "btnode.h" | 31 | #include "btnode.h" |
| 32 | #include "segment.h" | 32 | #include "segment.h" |
diff --git a/fs/ntfs/Makefile b/fs/ntfs/Makefile index 36ae529511c4..2ff263e6d363 100644 --- a/fs/ntfs/Makefile +++ b/fs/ntfs/Makefile | |||
| @@ -8,7 +8,7 @@ ntfs-y := aops.o attrib.o collate.o compress.o debug.o dir.o file.o \ | |||
| 8 | 8 | ||
| 9 | ntfs-$(CONFIG_NTFS_RW) += bitmap.o lcnalloc.o logfile.o quota.o usnjrnl.o | 9 | ntfs-$(CONFIG_NTFS_RW) += bitmap.o lcnalloc.o logfile.o quota.o usnjrnl.o |
| 10 | 10 | ||
| 11 | ccflags-y := -DNTFS_VERSION=\"2.1.31\" | 11 | ccflags-y := -DNTFS_VERSION=\"2.1.32\" |
| 12 | ccflags-$(CONFIG_NTFS_DEBUG) += -DDEBUG | 12 | ccflags-$(CONFIG_NTFS_DEBUG) += -DDEBUG |
| 13 | ccflags-$(CONFIG_NTFS_RW) += -DNTFS_RW | 13 | ccflags-$(CONFIG_NTFS_RW) += -DNTFS_RW |
| 14 | 14 | ||
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c index 1da9b2d184dc..c1da78dad1af 100644 --- a/fs/ntfs/file.c +++ b/fs/ntfs/file.c | |||
| @@ -1,7 +1,7 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * file.c - NTFS kernel file operations. Part of the Linux-NTFS project. | 2 | * file.c - NTFS kernel file operations. Part of the Linux-NTFS project. |
| 3 | * | 3 | * |
| 4 | * Copyright (c) 2001-2014 Anton Altaparmakov and Tuxera Inc. | 4 | * Copyright (c) 2001-2015 Anton Altaparmakov and Tuxera Inc. |
| 5 | * | 5 | * |
| 6 | * This program/include file is free software; you can redistribute it and/or | 6 | * This program/include file is free software; you can redistribute it and/or |
| 7 | * modify it under the terms of the GNU General Public License as published | 7 | * modify it under the terms of the GNU General Public License as published |
| @@ -28,7 +28,6 @@ | |||
| 28 | #include <linux/swap.h> | 28 | #include <linux/swap.h> |
| 29 | #include <linux/uio.h> | 29 | #include <linux/uio.h> |
| 30 | #include <linux/writeback.h> | 30 | #include <linux/writeback.h> |
| 31 | #include <linux/aio.h> | ||
| 32 | 31 | ||
| 33 | #include <asm/page.h> | 32 | #include <asm/page.h> |
| 34 | #include <asm/uaccess.h> | 33 | #include <asm/uaccess.h> |
| @@ -329,62 +328,168 @@ err_out: | |||
| 329 | return err; | 328 | return err; |
| 330 | } | 329 | } |
| 331 | 330 | ||
| 332 | /** | 331 | static ssize_t ntfs_prepare_file_for_write(struct file *file, loff_t *ppos, |
| 333 | * ntfs_fault_in_pages_readable - | 332 | size_t *count) |
| 334 | * | ||
| 335 | * Fault a number of userspace pages into pagetables. | ||
| 336 | * | ||
| 337 | * Unlike include/linux/pagemap.h::fault_in_pages_readable(), this one copes | ||
| 338 | * with more than two userspace pages as well as handling the single page case | ||
| 339 | * elegantly. | ||
| 340 | * | ||
| 341 | * If you find this difficult to understand, then think of the while loop being | ||
| 342 | * the following code, except that we do without the integer variable ret: | ||
| 343 | * | ||
| 344 | * do { | ||
| 345 | * ret = __get_user(c, uaddr); | ||
| 346 | * uaddr += PAGE_SIZE; | ||
| 347 | * } while (!ret && uaddr < end); | ||
| 348 | * | ||
| 349 | * Note, the final __get_user() may well run out-of-bounds of the user buffer, | ||
| 350 | * but _not_ out-of-bounds of the page the user buffer belongs to, and since | ||
| 351 | * this is only a read and not a write, and since it is still in the same page, | ||
| 352 | * it should not matter and this makes the code much simpler. | ||
| 353 | */ | ||
| 354 | static inline void ntfs_fault_in_pages_readable(const char __user *uaddr, | ||
| 355 | int bytes) | ||
| 356 | { | 333 | { |
| 357 | const char __user *end; | 334 | loff_t pos; |
| 358 | volatile char c; | 335 | s64 end, ll; |
| 359 | 336 | ssize_t err; | |
| 360 | /* Set @end to the first byte outside the last page we care about. */ | 337 | unsigned long flags; |
| 361 | end = (const char __user*)PAGE_ALIGN((unsigned long)uaddr + bytes); | 338 | struct inode *vi = file_inode(file); |
| 362 | 339 | ntfs_inode *base_ni, *ni = NTFS_I(vi); | |
| 363 | while (!__get_user(c, uaddr) && (uaddr += PAGE_SIZE, uaddr < end)) | 340 | ntfs_volume *vol = ni->vol; |
| 364 | ; | ||
| 365 | } | ||
| 366 | |||
| 367 | /** | ||
| 368 | * ntfs_fault_in_pages_readable_iovec - | ||
| 369 | * | ||
| 370 | * Same as ntfs_fault_in_pages_readable() but operates on an array of iovecs. | ||
| 371 | */ | ||
| 372 | static inline void ntfs_fault_in_pages_readable_iovec(const struct iovec *iov, | ||
| 373 | size_t iov_ofs, int bytes) | ||
| 374 | { | ||
| 375 | do { | ||
| 376 | const char __user *buf; | ||
| 377 | unsigned len; | ||
| 378 | 341 | ||
| 379 | buf = iov->iov_base + iov_ofs; | 342 | ntfs_debug("Entering for i_ino 0x%lx, attribute type 0x%x, pos " |
| 380 | len = iov->iov_len - iov_ofs; | 343 | "0x%llx, count 0x%lx.", vi->i_ino, |
| 381 | if (len > bytes) | 344 | (unsigned)le32_to_cpu(ni->type), |
| 382 | len = bytes; | 345 | (unsigned long long)*ppos, (unsigned long)*count); |
| 383 | ntfs_fault_in_pages_readable(buf, len); | 346 | /* We can write back this queue in page reclaim. */ |
| 384 | bytes -= len; | 347 | current->backing_dev_info = inode_to_bdi(vi); |
| 385 | iov++; | 348 | err = generic_write_checks(file, ppos, count, S_ISBLK(vi->i_mode)); |
| 386 | iov_ofs = 0; | 349 | if (unlikely(err)) |
| 387 | } while (bytes); | 350 | goto out; |
| 351 | /* | ||
| 352 | * All checks have passed. Before we start doing any writing we want | ||
| 353 | * to abort any totally illegal writes. | ||
| 354 | */ | ||
| 355 | BUG_ON(NInoMstProtected(ni)); | ||
| 356 | BUG_ON(ni->type != AT_DATA); | ||
| 357 | /* If file is encrypted, deny access, just like NT4. */ | ||
| 358 | if (NInoEncrypted(ni)) { | ||
| 359 | /* Only $DATA attributes can be encrypted. */ | ||
| 360 | /* | ||
| 361 | * Reminder for later: Encrypted files are _always_ | ||
| 362 | * non-resident so that the content can always be encrypted. | ||
| 363 | */ | ||
| 364 | ntfs_debug("Denying write access to encrypted file."); | ||
| 365 | err = -EACCES; | ||
| 366 | goto out; | ||
| 367 | } | ||
| 368 | if (NInoCompressed(ni)) { | ||
| 369 | /* Only unnamed $DATA attribute can be compressed. */ | ||
| 370 | BUG_ON(ni->name_len); | ||
| 371 | /* | ||
| 372 | * Reminder for later: If resident, the data is not actually | ||
| 373 | * compressed. Only on the switch to non-resident does | ||
| 374 | * compression kick in. This is in contrast to encrypted files | ||
| 375 | * (see above). | ||
| 376 | */ | ||
| 377 | ntfs_error(vi->i_sb, "Writing to compressed files is not " | ||
| 378 | "implemented yet. Sorry."); | ||
| 379 | err = -EOPNOTSUPP; | ||
| 380 | goto out; | ||
| 381 | } | ||
| 382 | if (*count == 0) | ||
| 383 | goto out; | ||
| 384 | base_ni = ni; | ||
| 385 | if (NInoAttr(ni)) | ||
| 386 | base_ni = ni->ext.base_ntfs_ino; | ||
| 387 | err = file_remove_suid(file); | ||
| 388 | if (unlikely(err)) | ||
| 389 | goto out; | ||
| 390 | /* | ||
| 391 | * Our ->update_time method always succeeds thus file_update_time() | ||
| 392 | * cannot fail either so there is no need to check the return code. | ||
| 393 | */ | ||
| 394 | file_update_time(file); | ||
| 395 | pos = *ppos; | ||
| 396 | /* The first byte after the last cluster being written to. */ | ||
| 397 | end = (pos + *count + vol->cluster_size_mask) & | ||
| 398 | ~(u64)vol->cluster_size_mask; | ||
| 399 | /* | ||
| 400 | * If the write goes beyond the allocated size, extend the allocation | ||
| 401 | * to cover the whole of the write, rounded up to the nearest cluster. | ||
| 402 | */ | ||
| 403 | read_lock_irqsave(&ni->size_lock, flags); | ||
| 404 | ll = ni->allocated_size; | ||
| 405 | read_unlock_irqrestore(&ni->size_lock, flags); | ||
| 406 | if (end > ll) { | ||
| 407 | /* | ||
| 408 | * Extend the allocation without changing the data size. | ||
| 409 | * | ||
| 410 | * Note we ensure the allocation is big enough to at least | ||
| 411 | * write some data but we do not require the allocation to be | ||
| 412 | * complete, i.e. it may be partial. | ||
| 413 | */ | ||
| 414 | ll = ntfs_attr_extend_allocation(ni, end, -1, pos); | ||
| 415 | if (likely(ll >= 0)) { | ||
| 416 | BUG_ON(pos >= ll); | ||
| 417 | /* If the extension was partial truncate the write. */ | ||
| 418 | if (end > ll) { | ||
| 419 | ntfs_debug("Truncating write to inode 0x%lx, " | ||
| 420 | "attribute type 0x%x, because " | ||
| 421 | "the allocation was only " | ||
| 422 | "partially extended.", | ||
| 423 | vi->i_ino, (unsigned) | ||
| 424 | le32_to_cpu(ni->type)); | ||
| 425 | *count = ll - pos; | ||
| 426 | } | ||
| 427 | } else { | ||
| 428 | err = ll; | ||
| 429 | read_lock_irqsave(&ni->size_lock, flags); | ||
| 430 | ll = ni->allocated_size; | ||
| 431 | read_unlock_irqrestore(&ni->size_lock, flags); | ||
| 432 | /* Perform a partial write if possible or fail. */ | ||
| 433 | if (pos < ll) { | ||
| 434 | ntfs_debug("Truncating write to inode 0x%lx " | ||
| 435 | "attribute type 0x%x, because " | ||
| 436 | "extending the allocation " | ||
| 437 | "failed (error %d).", | ||
| 438 | vi->i_ino, (unsigned) | ||
| 439 | le32_to_cpu(ni->type), | ||
| 440 | (int)-err); | ||
| 441 | *count = ll - pos; | ||
| 442 | } else { | ||
| 443 | if (err != -ENOSPC) | ||
| 444 | ntfs_error(vi->i_sb, "Cannot perform " | ||
| 445 | "write to inode " | ||
| 446 | "0x%lx, attribute " | ||
| 447 | "type 0x%x, because " | ||
| 448 | "extending the " | ||
| 449 | "allocation failed " | ||
| 450 | "(error %ld).", | ||
| 451 | vi->i_ino, (unsigned) | ||
| 452 | le32_to_cpu(ni->type), | ||
| 453 | (long)-err); | ||
| 454 | else | ||
| 455 | ntfs_debug("Cannot perform write to " | ||
| 456 | "inode 0x%lx, " | ||
| 457 | "attribute type 0x%x, " | ||
| 458 | "because there is not " | ||
| 459 | "space left.", | ||
| 460 | vi->i_ino, (unsigned) | ||
| 461 | le32_to_cpu(ni->type)); | ||
| 462 | goto out; | ||
| 463 | } | ||
| 464 | } | ||
| 465 | } | ||
| 466 | /* | ||
| 467 | * If the write starts beyond the initialized size, extend it up to the | ||
| 468 | * beginning of the write and initialize all non-sparse space between | ||
| 469 | * the old initialized size and the new one. This automatically also | ||
| 470 | * increments the vfs inode->i_size to keep it above or equal to the | ||
| 471 | * initialized_size. | ||
| 472 | */ | ||
| 473 | read_lock_irqsave(&ni->size_lock, flags); | ||
| 474 | ll = ni->initialized_size; | ||
| 475 | read_unlock_irqrestore(&ni->size_lock, flags); | ||
| 476 | if (pos > ll) { | ||
| 477 | /* | ||
| 478 | * Wait for ongoing direct i/o to complete before proceeding. | ||
| 479 | * New direct i/o cannot start as we hold i_mutex. | ||
| 480 | */ | ||
| 481 | inode_dio_wait(vi); | ||
| 482 | err = ntfs_attr_extend_initialized(ni, pos); | ||
| 483 | if (unlikely(err < 0)) | ||
| 484 | ntfs_error(vi->i_sb, "Cannot perform write to inode " | ||
| 485 | "0x%lx, attribute type 0x%x, because " | ||
| 486 | "extending the initialized size " | ||
| 487 | "failed (error %d).", vi->i_ino, | ||
| 488 | (unsigned)le32_to_cpu(ni->type), | ||
| 489 | (int)-err); | ||
| 490 | } | ||
| 491 | out: | ||
| 492 | return err; | ||
| 388 | } | 493 | } |
| 389 | 494 | ||
| 390 | /** | 495 | /** |
| @@ -421,8 +526,8 @@ static inline int __ntfs_grab_cache_pages(struct address_space *mapping, | |||
| 421 | goto err_out; | 526 | goto err_out; |
| 422 | } | 527 | } |
| 423 | } | 528 | } |
| 424 | err = add_to_page_cache_lru(*cached_page, mapping, index, | 529 | err = add_to_page_cache_lru(*cached_page, mapping, |
| 425 | GFP_KERNEL); | 530 | index, GFP_KERNEL); |
| 426 | if (unlikely(err)) { | 531 | if (unlikely(err)) { |
| 427 | if (err == -EEXIST) | 532 | if (err == -EEXIST) |
| 428 | continue; | 533 | continue; |
| @@ -1268,180 +1373,6 @@ rl_not_mapped_enoent: | |||
| 1268 | return err; | 1373 | return err; |
| 1269 | } | 1374 | } |
| 1270 | 1375 | ||
| 1271 | /* | ||
| 1272 | * Copy as much as we can into the pages and return the number of bytes which | ||
| 1273 | * were successfully copied. If a fault is encountered then clear the pages | ||
| 1274 | * out to (ofs + bytes) and return the number of bytes which were copied. | ||
| 1275 | */ | ||
| 1276 | static inline size_t ntfs_copy_from_user(struct page **pages, | ||
| 1277 | unsigned nr_pages, unsigned ofs, const char __user *buf, | ||
| 1278 | size_t bytes) | ||
| 1279 | { | ||
| 1280 | struct page **last_page = pages + nr_pages; | ||
| 1281 | char *addr; | ||
| 1282 | size_t total = 0; | ||
| 1283 | unsigned len; | ||
| 1284 | int left; | ||
| 1285 | |||
| 1286 | do { | ||
| 1287 | len = PAGE_CACHE_SIZE - ofs; | ||
| 1288 | if (len > bytes) | ||
| 1289 | len = bytes; | ||
| 1290 | addr = kmap_atomic(*pages); | ||
| 1291 | left = __copy_from_user_inatomic(addr + ofs, buf, len); | ||
| 1292 | kunmap_atomic(addr); | ||
| 1293 | if (unlikely(left)) { | ||
| 1294 | /* Do it the slow way. */ | ||
| 1295 | addr = kmap(*pages); | ||
| 1296 | left = __copy_from_user(addr + ofs, buf, len); | ||
| 1297 | kunmap(*pages); | ||
| 1298 | if (unlikely(left)) | ||
| 1299 | goto err_out; | ||
| 1300 | } | ||
| 1301 | total += len; | ||
| 1302 | bytes -= len; | ||
| 1303 | if (!bytes) | ||
| 1304 | break; | ||
| 1305 | buf += len; | ||
| 1306 | ofs = 0; | ||
| 1307 | } while (++pages < last_page); | ||
| 1308 | out: | ||
| 1309 | return total; | ||
| 1310 | err_out: | ||
| 1311 | total += len - left; | ||
| 1312 | /* Zero the rest of the target like __copy_from_user(). */ | ||
| 1313 | while (++pages < last_page) { | ||
| 1314 | bytes -= len; | ||
| 1315 | if (!bytes) | ||
| 1316 | break; | ||
| 1317 | len = PAGE_CACHE_SIZE; | ||
| 1318 | if (len > bytes) | ||
| 1319 | len = bytes; | ||
| 1320 | zero_user(*pages, 0, len); | ||
| 1321 | } | ||
| 1322 | goto out; | ||
| 1323 | } | ||
| 1324 | |||
| 1325 | static size_t __ntfs_copy_from_user_iovec_inatomic(char *vaddr, | ||
| 1326 | const struct iovec *iov, size_t iov_ofs, size_t bytes) | ||
| 1327 | { | ||
| 1328 | size_t total = 0; | ||
| 1329 | |||
| 1330 | while (1) { | ||
| 1331 | const char __user *buf = iov->iov_base + iov_ofs; | ||
| 1332 | unsigned len; | ||
| 1333 | size_t left; | ||
| 1334 | |||
| 1335 | len = iov->iov_len - iov_ofs; | ||
| 1336 | if (len > bytes) | ||
| 1337 | len = bytes; | ||
| 1338 | left = __copy_from_user_inatomic(vaddr, buf, len); | ||
| 1339 | total += len; | ||
| 1340 | bytes -= len; | ||
| 1341 | vaddr += len; | ||
| 1342 | if (unlikely(left)) { | ||
| 1343 | total -= left; | ||
| 1344 | break; | ||
| 1345 | } | ||
| 1346 | if (!bytes) | ||
| 1347 | break; | ||
| 1348 | iov++; | ||
| 1349 | iov_ofs = 0; | ||
| 1350 | } | ||
| 1351 | return total; | ||
| 1352 | } | ||
| 1353 | |||
| 1354 | static inline void ntfs_set_next_iovec(const struct iovec **iovp, | ||
| 1355 | size_t *iov_ofsp, size_t bytes) | ||
| 1356 | { | ||
| 1357 | const struct iovec *iov = *iovp; | ||
| 1358 | size_t iov_ofs = *iov_ofsp; | ||
| 1359 | |||
| 1360 | while (bytes) { | ||
| 1361 | unsigned len; | ||
| 1362 | |||
| 1363 | len = iov->iov_len - iov_ofs; | ||
| 1364 | if (len > bytes) | ||
| 1365 | len = bytes; | ||
| 1366 | bytes -= len; | ||
| 1367 | iov_ofs += len; | ||
| 1368 | if (iov->iov_len == iov_ofs) { | ||
| 1369 | iov++; | ||
| 1370 | iov_ofs = 0; | ||
| 1371 | } | ||
| 1372 | } | ||
| 1373 | *iovp = iov; | ||
| 1374 | *iov_ofsp = iov_ofs; | ||
| 1375 | } | ||
| 1376 | |||
| 1377 | /* | ||
| 1378 | * This has the same side-effects and return value as ntfs_copy_from_user(). | ||
| 1379 | * The difference is that on a fault we need to memset the remainder of the | ||
| 1380 | * pages (out to offset + bytes), to emulate ntfs_copy_from_user()'s | ||
| 1381 | * single-segment behaviour. | ||
| 1382 | * | ||
| 1383 | * We call the same helper (__ntfs_copy_from_user_iovec_inatomic()) both when | ||
| 1384 | * atomic and when not atomic. This is ok because it calls | ||
| 1385 | * __copy_from_user_inatomic() and it is ok to call this when non-atomic. In | ||
| 1386 | * fact, the only difference between __copy_from_user_inatomic() and | ||
| 1387 | * __copy_from_user() is that the latter calls might_sleep() and the former | ||
| 1388 | * should not zero the tail of the buffer on error. And on many architectures | ||
| 1389 | * __copy_from_user_inatomic() is just defined to __copy_from_user() so it | ||
| 1390 | * makes no difference at all on those architectures. | ||
| 1391 | */ | ||
| 1392 | static inline size_t ntfs_copy_from_user_iovec(struct page **pages, | ||
| 1393 | unsigned nr_pages, unsigned ofs, const struct iovec **iov, | ||
| 1394 | size_t *iov_ofs, size_t bytes) | ||
| 1395 | { | ||
| 1396 | struct page **last_page = pages + nr_pages; | ||
| 1397 | char *addr; | ||
| 1398 | size_t copied, len, total = 0; | ||
| 1399 | |||
| 1400 | do { | ||
| 1401 | len = PAGE_CACHE_SIZE - ofs; | ||
| 1402 | if (len > bytes) | ||
| 1403 | len = bytes; | ||
| 1404 | addr = kmap_atomic(*pages); | ||
| 1405 | copied = __ntfs_copy_from_user_iovec_inatomic(addr + ofs, | ||
| 1406 | *iov, *iov_ofs, len); | ||
| 1407 | kunmap_atomic(addr); | ||
| 1408 | if (unlikely(copied != len)) { | ||
| 1409 | /* Do it the slow way. */ | ||
| 1410 | addr = kmap(*pages); | ||
| 1411 | copied = __ntfs_copy_from_user_iovec_inatomic(addr + | ||
| 1412 | ofs, *iov, *iov_ofs, len); | ||
| 1413 | if (unlikely(copied != len)) | ||
| 1414 | goto err_out; | ||
| 1415 | kunmap(*pages); | ||
| 1416 | } | ||
| 1417 | total += len; | ||
| 1418 | ntfs_set_next_iovec(iov, iov_ofs, len); | ||
| 1419 | bytes -= len; | ||
| 1420 | if (!bytes) | ||
| 1421 | break; | ||
| 1422 | ofs = 0; | ||
| 1423 | } while (++pages < last_page); | ||
| 1424 | out: | ||
| 1425 | return total; | ||
| 1426 | err_out: | ||
| 1427 | BUG_ON(copied > len); | ||
| 1428 | /* Zero the rest of the target like __copy_from_user(). */ | ||
| 1429 | memset(addr + ofs + copied, 0, len - copied); | ||
| 1430 | kunmap(*pages); | ||
| 1431 | total += copied; | ||
| 1432 | ntfs_set_next_iovec(iov, iov_ofs, copied); | ||
| 1433 | while (++pages < last_page) { | ||
| 1434 | bytes -= len; | ||
| 1435 | if (!bytes) | ||
| 1436 | break; | ||
| 1437 | len = PAGE_CACHE_SIZE; | ||
| 1438 | if (len > bytes) | ||
| 1439 | len = bytes; | ||
| 1440 | zero_user(*pages, 0, len); | ||
| 1441 | } | ||
| 1442 | goto out; | ||
| 1443 | } | ||
| 1444 | |||
| 1445 | static inline void ntfs_flush_dcache_pages(struct page **pages, | 1376 | static inline void ntfs_flush_dcache_pages(struct page **pages, |
| 1446 | unsigned nr_pages) | 1377 | unsigned nr_pages) |
| 1447 | { | 1378 | { |
| @@ -1762,86 +1693,83 @@ err_out: | |||
| 1762 | return err; | 1693 | return err; |
| 1763 | } | 1694 | } |
| 1764 | 1695 | ||
| 1765 | static void ntfs_write_failed(struct address_space *mapping, loff_t to) | 1696 | /* |
| 1697 | * Copy as much as we can into the pages and return the number of bytes which | ||
| 1698 | * were successfully copied. If a fault is encountered then clear the pages | ||
| 1699 | * out to (ofs + bytes) and return the number of bytes which were copied. | ||
| 1700 | */ | ||
| 1701 | static size_t ntfs_copy_from_user_iter(struct page **pages, unsigned nr_pages, | ||
| 1702 | unsigned ofs, struct iov_iter *i, size_t bytes) | ||
| 1766 | { | 1703 | { |
| 1767 | struct inode *inode = mapping->host; | 1704 | struct page **last_page = pages + nr_pages; |
| 1705 | size_t total = 0; | ||
| 1706 | struct iov_iter data = *i; | ||
| 1707 | unsigned len, copied; | ||
| 1768 | 1708 | ||
| 1769 | if (to > inode->i_size) { | 1709 | do { |
| 1770 | truncate_pagecache(inode, inode->i_size); | 1710 | len = PAGE_CACHE_SIZE - ofs; |
| 1771 | ntfs_truncate_vfs(inode); | 1711 | if (len > bytes) |
| 1772 | } | 1712 | len = bytes; |
| 1713 | copied = iov_iter_copy_from_user_atomic(*pages, &data, ofs, | ||
| 1714 | len); | ||
| 1715 | total += copied; | ||
| 1716 | bytes -= copied; | ||
| 1717 | if (!bytes) | ||
| 1718 | break; | ||
| 1719 | iov_iter_advance(&data, copied); | ||
| 1720 | if (copied < len) | ||
| 1721 | goto err; | ||
| 1722 | ofs = 0; | ||
| 1723 | } while (++pages < last_page); | ||
| 1724 | out: | ||
| 1725 | return total; | ||
| 1726 | err: | ||
| 1727 | /* Zero the rest of the target like __copy_from_user(). */ | ||
| 1728 | len = PAGE_CACHE_SIZE - copied; | ||
| 1729 | do { | ||
| 1730 | if (len > bytes) | ||
| 1731 | len = bytes; | ||
| 1732 | zero_user(*pages, copied, len); | ||
| 1733 | bytes -= len; | ||
| 1734 | copied = 0; | ||
| 1735 | len = PAGE_CACHE_SIZE; | ||
| 1736 | } while (++pages < last_page); | ||
| 1737 | goto out; | ||
| 1773 | } | 1738 | } |
| 1774 | 1739 | ||
| 1775 | /** | 1740 | /** |
| 1776 | * ntfs_file_buffered_write - | 1741 | * ntfs_perform_write - perform buffered write to a file |
| 1777 | * | 1742 | * @file: file to write to |
| 1778 | * Locking: The vfs is holding ->i_mutex on the inode. | 1743 | * @i: iov_iter with data to write |
| 1744 | * @pos: byte offset in file at which to begin writing to | ||
| 1779 | */ | 1745 | */ |
| 1780 | static ssize_t ntfs_file_buffered_write(struct kiocb *iocb, | 1746 | static ssize_t ntfs_perform_write(struct file *file, struct iov_iter *i, |
| 1781 | const struct iovec *iov, unsigned long nr_segs, | 1747 | loff_t pos) |
| 1782 | loff_t pos, loff_t *ppos, size_t count) | ||
| 1783 | { | 1748 | { |
| 1784 | struct file *file = iocb->ki_filp; | ||
| 1785 | struct address_space *mapping = file->f_mapping; | 1749 | struct address_space *mapping = file->f_mapping; |
| 1786 | struct inode *vi = mapping->host; | 1750 | struct inode *vi = mapping->host; |
| 1787 | ntfs_inode *ni = NTFS_I(vi); | 1751 | ntfs_inode *ni = NTFS_I(vi); |
| 1788 | ntfs_volume *vol = ni->vol; | 1752 | ntfs_volume *vol = ni->vol; |
| 1789 | struct page *pages[NTFS_MAX_PAGES_PER_CLUSTER]; | 1753 | struct page *pages[NTFS_MAX_PAGES_PER_CLUSTER]; |
| 1790 | struct page *cached_page = NULL; | 1754 | struct page *cached_page = NULL; |
| 1791 | char __user *buf = NULL; | ||
| 1792 | s64 end, ll; | ||
| 1793 | VCN last_vcn; | 1755 | VCN last_vcn; |
| 1794 | LCN lcn; | 1756 | LCN lcn; |
| 1795 | unsigned long flags; | 1757 | size_t bytes; |
| 1796 | size_t bytes, iov_ofs = 0; /* Offset in the current iovec. */ | 1758 | ssize_t status, written = 0; |
| 1797 | ssize_t status, written; | ||
| 1798 | unsigned nr_pages; | 1759 | unsigned nr_pages; |
| 1799 | int err; | ||
| 1800 | 1760 | ||
| 1801 | ntfs_debug("Entering for i_ino 0x%lx, attribute type 0x%x, " | 1761 | ntfs_debug("Entering for i_ino 0x%lx, attribute type 0x%x, pos " |
| 1802 | "pos 0x%llx, count 0x%lx.", | 1762 | "0x%llx, count 0x%lx.", vi->i_ino, |
| 1803 | vi->i_ino, (unsigned)le32_to_cpu(ni->type), | 1763 | (unsigned)le32_to_cpu(ni->type), |
| 1804 | (unsigned long long)pos, (unsigned long)count); | 1764 | (unsigned long long)pos, |
| 1805 | if (unlikely(!count)) | 1765 | (unsigned long)iov_iter_count(i)); |
| 1806 | return 0; | ||
| 1807 | BUG_ON(NInoMstProtected(ni)); | ||
| 1808 | /* | ||
| 1809 | * If the attribute is not an index root and it is encrypted or | ||
| 1810 | * compressed, we cannot write to it yet. Note we need to check for | ||
| 1811 | * AT_INDEX_ALLOCATION since this is the type of both directory and | ||
| 1812 | * index inodes. | ||
| 1813 | */ | ||
| 1814 | if (ni->type != AT_INDEX_ALLOCATION) { | ||
| 1815 | /* If file is encrypted, deny access, just like NT4. */ | ||
| 1816 | if (NInoEncrypted(ni)) { | ||
| 1817 | /* | ||
| 1818 | * Reminder for later: Encrypted files are _always_ | ||
| 1819 | * non-resident so that the content can always be | ||
| 1820 | * encrypted. | ||
| 1821 | */ | ||
| 1822 | ntfs_debug("Denying write access to encrypted file."); | ||
| 1823 | return -EACCES; | ||
| 1824 | } | ||
| 1825 | if (NInoCompressed(ni)) { | ||
| 1826 | /* Only unnamed $DATA attribute can be compressed. */ | ||
| 1827 | BUG_ON(ni->type != AT_DATA); | ||
| 1828 | BUG_ON(ni->name_len); | ||
| 1829 | /* | ||
| 1830 | * Reminder for later: If resident, the data is not | ||
| 1831 | * actually compressed. Only on the switch to non- | ||
| 1832 | * resident does compression kick in. This is in | ||
| 1833 | * contrast to encrypted files (see above). | ||
| 1834 | */ | ||
| 1835 | ntfs_error(vi->i_sb, "Writing to compressed files is " | ||
| 1836 | "not implemented yet. Sorry."); | ||
| 1837 | return -EOPNOTSUPP; | ||
| 1838 | } | ||
| 1839 | } | ||
| 1840 | /* | 1766 | /* |
| 1841 | * If a previous ntfs_truncate() failed, repeat it and abort if it | 1767 | * If a previous ntfs_truncate() failed, repeat it and abort if it |
| 1842 | * fails again. | 1768 | * fails again. |
| 1843 | */ | 1769 | */ |
| 1844 | if (unlikely(NInoTruncateFailed(ni))) { | 1770 | if (unlikely(NInoTruncateFailed(ni))) { |
| 1771 | int err; | ||
| 1772 | |||
| 1845 | inode_dio_wait(vi); | 1773 | inode_dio_wait(vi); |
| 1846 | err = ntfs_truncate(vi); | 1774 | err = ntfs_truncate(vi); |
| 1847 | if (err || NInoTruncateFailed(ni)) { | 1775 | if (err || NInoTruncateFailed(ni)) { |
| @@ -1855,81 +1783,6 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb, | |||
| 1855 | return err; | 1783 | return err; |
| 1856 | } | 1784 | } |
| 1857 | } | 1785 | } |
| 1858 | /* The first byte after the write. */ | ||
| 1859 | end = pos + count; | ||
| 1860 | /* | ||
| 1861 | * If the write goes beyond the allocated size, extend the allocation | ||
| 1862 | * to cover the whole of the write, rounded up to the nearest cluster. | ||
| 1863 | */ | ||
| 1864 | read_lock_irqsave(&ni->size_lock, flags); | ||
| 1865 | ll = ni->allocated_size; | ||
| 1866 | read_unlock_irqrestore(&ni->size_lock, flags); | ||
| 1867 | if (end > ll) { | ||
| 1868 | /* Extend the allocation without changing the data size. */ | ||
| 1869 | ll = ntfs_attr_extend_allocation(ni, end, -1, pos); | ||
| 1870 | if (likely(ll >= 0)) { | ||
| 1871 | BUG_ON(pos >= ll); | ||
| 1872 | /* If the extension was partial truncate the write. */ | ||
| 1873 | if (end > ll) { | ||
| 1874 | ntfs_debug("Truncating write to inode 0x%lx, " | ||
| 1875 | "attribute type 0x%x, because " | ||
| 1876 | "the allocation was only " | ||
| 1877 | "partially extended.", | ||
| 1878 | vi->i_ino, (unsigned) | ||
| 1879 | le32_to_cpu(ni->type)); | ||
| 1880 | end = ll; | ||
| 1881 | count = ll - pos; | ||
| 1882 | } | ||
| 1883 | } else { | ||
| 1884 | err = ll; | ||
| 1885 | read_lock_irqsave(&ni->size_lock, flags); | ||
| 1886 | ll = ni->allocated_size; | ||
| 1887 | read_unlock_irqrestore(&ni->size_lock, flags); | ||
| 1888 | /* Perform a partial write if possible or fail. */ | ||
| 1889 | if (pos < ll) { | ||
| 1890 | ntfs_debug("Truncating write to inode 0x%lx, " | ||
| 1891 | "attribute type 0x%x, because " | ||
| 1892 | "extending the allocation " | ||
| 1893 | "failed (error code %i).", | ||
| 1894 | vi->i_ino, (unsigned) | ||
| 1895 | le32_to_cpu(ni->type), err); | ||
| 1896 | end = ll; | ||
| 1897 | count = ll - pos; | ||
| 1898 | } else { | ||
| 1899 | ntfs_error(vol->sb, "Cannot perform write to " | ||
| 1900 | "inode 0x%lx, attribute type " | ||
| 1901 | "0x%x, because extending the " | ||
| 1902 | "allocation failed (error " | ||
| 1903 | "code %i).", vi->i_ino, | ||
| 1904 | (unsigned) | ||
| 1905 | le32_to_cpu(ni->type), err); | ||
| 1906 | return err; | ||
| 1907 | } | ||
| 1908 | } | ||
| 1909 | } | ||
| 1910 | written = 0; | ||
| 1911 | /* | ||
| 1912 | * If the write starts beyond the initialized size, extend it up to the | ||
| 1913 | * beginning of the write and initialize all non-sparse space between | ||
| 1914 | * the old initialized size and the new one. This automatically also | ||
| 1915 | * increments the vfs inode->i_size to keep it above or equal to the | ||
| 1916 | * initialized_size. | ||
| 1917 | */ | ||
| 1918 | read_lock_irqsave(&ni->size_lock, flags); | ||
| 1919 | ll = ni->initialized_size; | ||
| 1920 | read_unlock_irqrestore(&ni->size_lock, flags); | ||
| 1921 | if (pos > ll) { | ||
| 1922 | err = ntfs_attr_extend_initialized(ni, pos); | ||
| 1923 | if (err < 0) { | ||
| 1924 | ntfs_error(vol->sb, "Cannot perform write to inode " | ||
| 1925 | "0x%lx, attribute type 0x%x, because " | ||
| 1926 | "extending the initialized size " | ||
| 1927 | "failed (error code %i).", vi->i_ino, | ||
| 1928 | (unsigned)le32_to_cpu(ni->type), err); | ||
| 1929 | status = err; | ||
| 1930 | goto err_out; | ||
| 1931 | } | ||
| 1932 | } | ||
| 1933 | /* | 1786 | /* |
| 1934 | * Determine the number of pages per cluster for non-resident | 1787 | * Determine the number of pages per cluster for non-resident |
| 1935 | * attributes. | 1788 | * attributes. |
| @@ -1937,10 +1790,7 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb, | |||
| 1937 | nr_pages = 1; | 1790 | nr_pages = 1; |
| 1938 | if (vol->cluster_size > PAGE_CACHE_SIZE && NInoNonResident(ni)) | 1791 | if (vol->cluster_size > PAGE_CACHE_SIZE && NInoNonResident(ni)) |
| 1939 | nr_pages = vol->cluster_size >> PAGE_CACHE_SHIFT; | 1792 | nr_pages = vol->cluster_size >> PAGE_CACHE_SHIFT; |
| 1940 | /* Finally, perform the actual write. */ | ||
| 1941 | last_vcn = -1; | 1793 | last_vcn = -1; |
| 1942 | if (likely(nr_segs == 1)) | ||
| 1943 | buf = iov->iov_base; | ||
| 1944 | do { | 1794 | do { |
| 1945 | VCN vcn; | 1795 | VCN vcn; |
| 1946 | pgoff_t idx, start_idx; | 1796 | pgoff_t idx, start_idx; |
| @@ -1965,10 +1815,10 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb, | |||
| 1965 | vol->cluster_size_bits, false); | 1815 | vol->cluster_size_bits, false); |
| 1966 | up_read(&ni->runlist.lock); | 1816 | up_read(&ni->runlist.lock); |
| 1967 | if (unlikely(lcn < LCN_HOLE)) { | 1817 | if (unlikely(lcn < LCN_HOLE)) { |
| 1968 | status = -EIO; | ||
| 1969 | if (lcn == LCN_ENOMEM) | 1818 | if (lcn == LCN_ENOMEM) |
| 1970 | status = -ENOMEM; | 1819 | status = -ENOMEM; |
| 1971 | else | 1820 | else { |
| 1821 | status = -EIO; | ||
| 1972 | ntfs_error(vol->sb, "Cannot " | 1822 | ntfs_error(vol->sb, "Cannot " |
| 1973 | "perform write to " | 1823 | "perform write to " |
| 1974 | "inode 0x%lx, " | 1824 | "inode 0x%lx, " |
| @@ -1977,6 +1827,7 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb, | |||
| 1977 | "is corrupt.", | 1827 | "is corrupt.", |
| 1978 | vi->i_ino, (unsigned) | 1828 | vi->i_ino, (unsigned) |
| 1979 | le32_to_cpu(ni->type)); | 1829 | le32_to_cpu(ni->type)); |
| 1830 | } | ||
| 1980 | break; | 1831 | break; |
| 1981 | } | 1832 | } |
| 1982 | if (lcn == LCN_HOLE) { | 1833 | if (lcn == LCN_HOLE) { |
| @@ -1989,8 +1840,9 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb, | |||
| 1989 | } | 1840 | } |
| 1990 | } | 1841 | } |
| 1991 | } | 1842 | } |
| 1992 | if (bytes > count) | 1843 | if (bytes > iov_iter_count(i)) |
| 1993 | bytes = count; | 1844 | bytes = iov_iter_count(i); |
| 1845 | again: | ||
| 1994 | /* | 1846 | /* |
| 1995 | * Bring in the user page(s) that we will copy from _first_. | 1847 | * Bring in the user page(s) that we will copy from _first_. |
| 1996 | * Otherwise there is a nasty deadlock on copying from the same | 1848 | * Otherwise there is a nasty deadlock on copying from the same |
| @@ -1999,10 +1851,10 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb, | |||
| 1999 | * pages being swapped out between us bringing them into memory | 1851 | * pages being swapped out between us bringing them into memory |
| 2000 | * and doing the actual copying. | 1852 | * and doing the actual copying. |
| 2001 | */ | 1853 | */ |
| 2002 | if (likely(nr_segs == 1)) | 1854 | if (unlikely(iov_iter_fault_in_multipages_readable(i, bytes))) { |
| 2003 | ntfs_fault_in_pages_readable(buf, bytes); | 1855 | status = -EFAULT; |
| 2004 | else | 1856 | break; |
| 2005 | ntfs_fault_in_pages_readable_iovec(iov, iov_ofs, bytes); | 1857 | } |
| 2006 | /* Get and lock @do_pages starting at index @start_idx. */ | 1858 | /* Get and lock @do_pages starting at index @start_idx. */ |
| 2007 | status = __ntfs_grab_cache_pages(mapping, start_idx, do_pages, | 1859 | status = __ntfs_grab_cache_pages(mapping, start_idx, do_pages, |
| 2008 | pages, &cached_page); | 1860 | pages, &cached_page); |
| @@ -2018,56 +1870,57 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb, | |||
| 2018 | status = ntfs_prepare_pages_for_non_resident_write( | 1870 | status = ntfs_prepare_pages_for_non_resident_write( |
| 2019 | pages, do_pages, pos, bytes); | 1871 | pages, do_pages, pos, bytes); |
| 2020 | if (unlikely(status)) { | 1872 | if (unlikely(status)) { |
| 2021 | loff_t i_size; | ||
| 2022 | |||
| 2023 | do { | 1873 | do { |
| 2024 | unlock_page(pages[--do_pages]); | 1874 | unlock_page(pages[--do_pages]); |
| 2025 | page_cache_release(pages[do_pages]); | 1875 | page_cache_release(pages[do_pages]); |
| 2026 | } while (do_pages); | 1876 | } while (do_pages); |
| 2027 | /* | ||
| 2028 | * The write preparation may have instantiated | ||
| 2029 | * allocated space outside i_size. Trim this | ||
| 2030 | * off again. We can ignore any errors in this | ||
| 2031 | * case as we will just be waisting a bit of | ||
| 2032 | * allocated space, which is not a disaster. | ||
| 2033 | */ | ||
| 2034 | i_size = i_size_read(vi); | ||
| 2035 | if (pos + bytes > i_size) { | ||
| 2036 | ntfs_write_failed(mapping, pos + bytes); | ||
| 2037 | } | ||
| 2038 | break; | 1877 | break; |
| 2039 | } | 1878 | } |
| 2040 | } | 1879 | } |
| 2041 | u = (pos >> PAGE_CACHE_SHIFT) - pages[0]->index; | 1880 | u = (pos >> PAGE_CACHE_SHIFT) - pages[0]->index; |
| 2042 | if (likely(nr_segs == 1)) { | 1881 | copied = ntfs_copy_from_user_iter(pages + u, do_pages - u, ofs, |
| 2043 | copied = ntfs_copy_from_user(pages + u, do_pages - u, | 1882 | i, bytes); |
| 2044 | ofs, buf, bytes); | ||
| 2045 | buf += copied; | ||
| 2046 | } else | ||
| 2047 | copied = ntfs_copy_from_user_iovec(pages + u, | ||
| 2048 | do_pages - u, ofs, &iov, &iov_ofs, | ||
| 2049 | bytes); | ||
| 2050 | ntfs_flush_dcache_pages(pages + u, do_pages - u); | 1883 | ntfs_flush_dcache_pages(pages + u, do_pages - u); |
| 2051 | status = ntfs_commit_pages_after_write(pages, do_pages, pos, | 1884 | status = 0; |
| 2052 | bytes); | 1885 | if (likely(copied == bytes)) { |
| 2053 | if (likely(!status)) { | 1886 | status = ntfs_commit_pages_after_write(pages, do_pages, |
| 2054 | written += copied; | 1887 | pos, bytes); |
| 2055 | count -= copied; | 1888 | if (!status) |
| 2056 | pos += copied; | 1889 | status = bytes; |
| 2057 | if (unlikely(copied != bytes)) | ||
| 2058 | status = -EFAULT; | ||
| 2059 | } | 1890 | } |
| 2060 | do { | 1891 | do { |
| 2061 | unlock_page(pages[--do_pages]); | 1892 | unlock_page(pages[--do_pages]); |
| 2062 | page_cache_release(pages[do_pages]); | 1893 | page_cache_release(pages[do_pages]); |
| 2063 | } while (do_pages); | 1894 | } while (do_pages); |
| 2064 | if (unlikely(status)) | 1895 | if (unlikely(status < 0)) |
| 2065 | break; | 1896 | break; |
| 2066 | balance_dirty_pages_ratelimited(mapping); | 1897 | copied = status; |
| 2067 | cond_resched(); | 1898 | cond_resched(); |
| 2068 | } while (count); | 1899 | if (unlikely(!copied)) { |
| 2069 | err_out: | 1900 | size_t sc; |
| 2070 | *ppos = pos; | 1901 | |
| 1902 | /* | ||
| 1903 | * We failed to copy anything. Fall back to single | ||
| 1904 | * segment length write. | ||
| 1905 | * | ||
| 1906 | * This is needed to avoid possible livelock in the | ||
| 1907 | * case that all segments in the iov cannot be copied | ||
| 1908 | * at once without a pagefault. | ||
| 1909 | */ | ||
| 1910 | sc = iov_iter_single_seg_count(i); | ||
| 1911 | if (bytes > sc) | ||
| 1912 | bytes = sc; | ||
| 1913 | goto again; | ||
| 1914 | } | ||
| 1915 | iov_iter_advance(i, copied); | ||
| 1916 | pos += copied; | ||
| 1917 | written += copied; | ||
| 1918 | balance_dirty_pages_ratelimited(mapping); | ||
| 1919 | if (fatal_signal_pending(current)) { | ||
| 1920 | status = -EINTR; | ||
| 1921 | break; | ||
| 1922 | } | ||
| 1923 | } while (iov_iter_count(i)); | ||
| 2071 | if (cached_page) | 1924 | if (cached_page) |
| 2072 | page_cache_release(cached_page); | 1925 | page_cache_release(cached_page); |
| 2073 | ntfs_debug("Done. Returning %s (written 0x%lx, status %li).", | 1926 | ntfs_debug("Done. Returning %s (written 0x%lx, status %li).", |
| @@ -2077,59 +1930,56 @@ err_out: | |||
| 2077 | } | 1930 | } |
| 2078 | 1931 | ||
| 2079 | /** | 1932 | /** |
| 2080 | * ntfs_file_aio_write_nolock - | 1933 | * ntfs_file_write_iter_nolock - write data to a file |
| 1934 | * @iocb: IO state structure (file, offset, etc.) | ||
| 1935 | * @from: iov_iter with data to write | ||
| 1936 | * | ||
| 1937 | * Basically the same as __generic_file_write_iter() except that it ends | ||
| 1938 | * up calling ntfs_perform_write() instead of generic_perform_write() and that | ||
| 1939 | * O_DIRECT is not implemented. | ||
| 2081 | */ | 1940 | */ |
| 2082 | static ssize_t ntfs_file_aio_write_nolock(struct kiocb *iocb, | 1941 | static ssize_t ntfs_file_write_iter_nolock(struct kiocb *iocb, |
| 2083 | const struct iovec *iov, unsigned long nr_segs, loff_t *ppos) | 1942 | struct iov_iter *from) |
| 2084 | { | 1943 | { |
| 2085 | struct file *file = iocb->ki_filp; | 1944 | struct file *file = iocb->ki_filp; |
| 2086 | struct address_space *mapping = file->f_mapping; | 1945 | loff_t pos = iocb->ki_pos; |
| 2087 | struct inode *inode = mapping->host; | 1946 | ssize_t written = 0; |
| 2088 | loff_t pos; | 1947 | ssize_t err; |
| 2089 | size_t count; /* after file limit checks */ | 1948 | size_t count = iov_iter_count(from); |
| 2090 | ssize_t written, err; | ||
| 2091 | 1949 | ||
| 2092 | count = iov_length(iov, nr_segs); | 1950 | err = ntfs_prepare_file_for_write(file, &pos, &count); |
| 2093 | pos = *ppos; | 1951 | if (count && !err) { |
| 2094 | /* We can write back this queue in page reclaim. */ | 1952 | iov_iter_truncate(from, count); |
| 2095 | current->backing_dev_info = inode_to_bdi(inode); | 1953 | written = ntfs_perform_write(file, from, pos); |
| 2096 | written = 0; | 1954 | if (likely(written >= 0)) |
| 2097 | err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); | 1955 | iocb->ki_pos = pos + written; |
| 2098 | if (err) | 1956 | } |
| 2099 | goto out; | ||
| 2100 | if (!count) | ||
| 2101 | goto out; | ||
| 2102 | err = file_remove_suid(file); | ||
| 2103 | if (err) | ||
| 2104 | goto out; | ||
| 2105 | err = file_update_time(file); | ||
| 2106 | if (err) | ||
| 2107 | goto out; | ||
| 2108 | written = ntfs_file_buffered_write(iocb, iov, nr_segs, pos, ppos, | ||
| 2109 | count); | ||
| 2110 | out: | ||
| 2111 | current->backing_dev_info = NULL; | 1957 | current->backing_dev_info = NULL; |
| 2112 | return written ? written : err; | 1958 | return written ? written : err; |
| 2113 | } | 1959 | } |
| 2114 | 1960 | ||
| 2115 | /** | 1961 | /** |
| 2116 | * ntfs_file_aio_write - | 1962 | * ntfs_file_write_iter - simple wrapper for ntfs_file_write_iter_nolock() |
| 1963 | * @iocb: IO state structure | ||
| 1964 | * @from: iov_iter with data to write | ||
| 1965 | * | ||
| 1966 | * Basically the same as generic_file_write_iter() except that it ends up | ||
| 1967 | * calling ntfs_file_write_iter_nolock() instead of | ||
| 1968 | * __generic_file_write_iter(). | ||
| 2117 | */ | 1969 | */ |
| 2118 | static ssize_t ntfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov, | 1970 | static ssize_t ntfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) |
| 2119 | unsigned long nr_segs, loff_t pos) | ||
| 2120 | { | 1971 | { |
| 2121 | struct file *file = iocb->ki_filp; | 1972 | struct file *file = iocb->ki_filp; |
| 2122 | struct address_space *mapping = file->f_mapping; | 1973 | struct inode *vi = file_inode(file); |
| 2123 | struct inode *inode = mapping->host; | ||
| 2124 | ssize_t ret; | 1974 | ssize_t ret; |
| 2125 | 1975 | ||
| 2126 | BUG_ON(iocb->ki_pos != pos); | 1976 | mutex_lock(&vi->i_mutex); |
| 2127 | 1977 | ret = ntfs_file_write_iter_nolock(iocb, from); | |
| 2128 | mutex_lock(&inode->i_mutex); | 1978 | mutex_unlock(&vi->i_mutex); |
| 2129 | ret = ntfs_file_aio_write_nolock(iocb, iov, nr_segs, &iocb->ki_pos); | ||
| 2130 | mutex_unlock(&inode->i_mutex); | ||
| 2131 | if (ret > 0) { | 1979 | if (ret > 0) { |
| 2132 | int err = generic_write_sync(file, iocb->ki_pos - ret, ret); | 1980 | ssize_t err; |
| 1981 | |||
| 1982 | err = generic_write_sync(file, iocb->ki_pos - ret, ret); | ||
| 2133 | if (err < 0) | 1983 | if (err < 0) |
| 2134 | ret = err; | 1984 | ret = err; |
| 2135 | } | 1985 | } |
| @@ -2197,37 +2047,17 @@ static int ntfs_file_fsync(struct file *filp, loff_t start, loff_t end, | |||
| 2197 | #endif /* NTFS_RW */ | 2047 | #endif /* NTFS_RW */ |
| 2198 | 2048 | ||
| 2199 | const struct file_operations ntfs_file_ops = { | 2049 | const struct file_operations ntfs_file_ops = { |
| 2200 | .llseek = generic_file_llseek, /* Seek inside file. */ | 2050 | .llseek = generic_file_llseek, |
| 2201 | .read = new_sync_read, /* Read from file. */ | 2051 | .read = new_sync_read, |
| 2202 | .read_iter = generic_file_read_iter, /* Async read from file. */ | 2052 | .read_iter = generic_file_read_iter, |
| 2203 | #ifdef NTFS_RW | 2053 | #ifdef NTFS_RW |
| 2204 | .write = do_sync_write, /* Write to file. */ | 2054 | .write = new_sync_write, |
| 2205 | .aio_write = ntfs_file_aio_write, /* Async write to file. */ | 2055 | .write_iter = ntfs_file_write_iter, |
| 2206 | /*.release = ,*/ /* Last file is closed. See | 2056 | .fsync = ntfs_file_fsync, |
| 2207 | fs/ext2/file.c:: | ||
| 2208 | ext2_release_file() for | ||
| 2209 | how to use this to discard | ||
| 2210 | preallocated space for | ||
| 2211 | write opened files. */ | ||
| 2212 | .fsync = ntfs_file_fsync, /* Sync a file to disk. */ | ||
| 2213 | /*.aio_fsync = ,*/ /* Sync all outstanding async | ||
| 2214 | i/o operations on a | ||
| 2215 | kiocb. */ | ||
| 2216 | #endif /* NTFS_RW */ | 2057 | #endif /* NTFS_RW */ |
| 2217 | /*.ioctl = ,*/ /* Perform function on the | 2058 | .mmap = generic_file_mmap, |
| 2218 | mounted filesystem. */ | 2059 | .open = ntfs_file_open, |
| 2219 | .mmap = generic_file_mmap, /* Mmap file. */ | 2060 | .splice_read = generic_file_splice_read, |
| 2220 | .open = ntfs_file_open, /* Open file. */ | ||
| 2221 | .splice_read = generic_file_splice_read /* Zero-copy data send with | ||
| 2222 | the data source being on | ||
| 2223 | the ntfs partition. We do | ||
| 2224 | not need to care about the | ||
| 2225 | data destination. */ | ||
| 2226 | /*.sendpage = ,*/ /* Zero-copy data send with | ||
| 2227 | the data destination being | ||
| 2228 | on the ntfs partition. We | ||
| 2229 | do not need to care about | ||
| 2230 | the data source. */ | ||
| 2231 | }; | 2061 | }; |
| 2232 | 2062 | ||
| 2233 | const struct inode_operations ntfs_file_inode_ops = { | 2063 | const struct inode_operations ntfs_file_inode_ops = { |
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c index 898b9949d363..1d0c21df0d80 100644 --- a/fs/ntfs/inode.c +++ b/fs/ntfs/inode.c | |||
| @@ -28,7 +28,6 @@ | |||
| 28 | #include <linux/quotaops.h> | 28 | #include <linux/quotaops.h> |
| 29 | #include <linux/slab.h> | 29 | #include <linux/slab.h> |
| 30 | #include <linux/log2.h> | 30 | #include <linux/log2.h> |
| 31 | #include <linux/aio.h> | ||
| 32 | 31 | ||
| 33 | #include "aops.h" | 32 | #include "aops.h" |
| 34 | #include "attrib.h" | 33 | #include "attrib.h" |
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 044158bd22be..2d7f76e52c37 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c | |||
| @@ -3370,7 +3370,7 @@ static int ocfs2_merge_rec_right(struct ocfs2_path *left_path, | |||
| 3370 | ret = ocfs2_get_right_path(et, left_path, &right_path); | 3370 | ret = ocfs2_get_right_path(et, left_path, &right_path); |
| 3371 | if (ret) { | 3371 | if (ret) { |
| 3372 | mlog_errno(ret); | 3372 | mlog_errno(ret); |
| 3373 | goto out; | 3373 | return ret; |
| 3374 | } | 3374 | } |
| 3375 | 3375 | ||
| 3376 | right_el = path_leaf_el(right_path); | 3376 | right_el = path_leaf_el(right_path); |
| @@ -3453,8 +3453,7 @@ static int ocfs2_merge_rec_right(struct ocfs2_path *left_path, | |||
| 3453 | subtree_index); | 3453 | subtree_index); |
| 3454 | } | 3454 | } |
| 3455 | out: | 3455 | out: |
| 3456 | if (right_path) | 3456 | ocfs2_free_path(right_path); |
| 3457 | ocfs2_free_path(right_path); | ||
| 3458 | return ret; | 3457 | return ret; |
| 3459 | } | 3458 | } |
| 3460 | 3459 | ||
| @@ -3536,7 +3535,7 @@ static int ocfs2_merge_rec_left(struct ocfs2_path *right_path, | |||
| 3536 | ret = ocfs2_get_left_path(et, right_path, &left_path); | 3535 | ret = ocfs2_get_left_path(et, right_path, &left_path); |
| 3537 | if (ret) { | 3536 | if (ret) { |
| 3538 | mlog_errno(ret); | 3537 | mlog_errno(ret); |
| 3539 | goto out; | 3538 | return ret; |
| 3540 | } | 3539 | } |
| 3541 | 3540 | ||
| 3542 | left_el = path_leaf_el(left_path); | 3541 | left_el = path_leaf_el(left_path); |
| @@ -3647,8 +3646,7 @@ static int ocfs2_merge_rec_left(struct ocfs2_path *right_path, | |||
| 3647 | right_path, subtree_index); | 3646 | right_path, subtree_index); |
| 3648 | } | 3647 | } |
| 3649 | out: | 3648 | out: |
| 3650 | if (left_path) | 3649 | ocfs2_free_path(left_path); |
| 3651 | ocfs2_free_path(left_path); | ||
| 3652 | return ret; | 3650 | return ret; |
| 3653 | } | 3651 | } |
| 3654 | 3652 | ||
| @@ -4334,17 +4332,17 @@ ocfs2_figure_merge_contig_type(struct ocfs2_extent_tree *et, | |||
| 4334 | } else if (path->p_tree_depth > 0) { | 4332 | } else if (path->p_tree_depth > 0) { |
| 4335 | status = ocfs2_find_cpos_for_left_leaf(sb, path, &left_cpos); | 4333 | status = ocfs2_find_cpos_for_left_leaf(sb, path, &left_cpos); |
| 4336 | if (status) | 4334 | if (status) |
| 4337 | goto out; | 4335 | goto exit; |
| 4338 | 4336 | ||
| 4339 | if (left_cpos != 0) { | 4337 | if (left_cpos != 0) { |
| 4340 | left_path = ocfs2_new_path_from_path(path); | 4338 | left_path = ocfs2_new_path_from_path(path); |
| 4341 | if (!left_path) | 4339 | if (!left_path) |
| 4342 | goto out; | 4340 | goto exit; |
| 4343 | 4341 | ||
| 4344 | status = ocfs2_find_path(et->et_ci, left_path, | 4342 | status = ocfs2_find_path(et->et_ci, left_path, |
| 4345 | left_cpos); | 4343 | left_cpos); |
| 4346 | if (status) | 4344 | if (status) |
| 4347 | goto out; | 4345 | goto free_left_path; |
| 4348 | 4346 | ||
| 4349 | new_el = path_leaf_el(left_path); | 4347 | new_el = path_leaf_el(left_path); |
| 4350 | 4348 | ||
| @@ -4361,7 +4359,7 @@ ocfs2_figure_merge_contig_type(struct ocfs2_extent_tree *et, | |||
| 4361 | le16_to_cpu(new_el->l_next_free_rec), | 4359 | le16_to_cpu(new_el->l_next_free_rec), |
| 4362 | le16_to_cpu(new_el->l_count)); | 4360 | le16_to_cpu(new_el->l_count)); |
| 4363 | status = -EINVAL; | 4361 | status = -EINVAL; |
| 4364 | goto out; | 4362 | goto free_left_path; |
| 4365 | } | 4363 | } |
| 4366 | rec = &new_el->l_recs[ | 4364 | rec = &new_el->l_recs[ |
| 4367 | le16_to_cpu(new_el->l_next_free_rec) - 1]; | 4365 | le16_to_cpu(new_el->l_next_free_rec) - 1]; |
| @@ -4388,18 +4386,18 @@ ocfs2_figure_merge_contig_type(struct ocfs2_extent_tree *et, | |||
| 4388 | path->p_tree_depth > 0) { | 4386 | path->p_tree_depth > 0) { |
| 4389 | status = ocfs2_find_cpos_for_right_leaf(sb, path, &right_cpos); | 4387 | status = ocfs2_find_cpos_for_right_leaf(sb, path, &right_cpos); |
| 4390 | if (status) | 4388 | if (status) |
| 4391 | goto out; | 4389 | goto free_left_path; |
| 4392 | 4390 | ||
| 4393 | if (right_cpos == 0) | 4391 | if (right_cpos == 0) |
| 4394 | goto out; | 4392 | goto free_left_path; |
| 4395 | 4393 | ||
| 4396 | right_path = ocfs2_new_path_from_path(path); | 4394 | right_path = ocfs2_new_path_from_path(path); |
| 4397 | if (!right_path) | 4395 | if (!right_path) |
| 4398 | goto out; | 4396 | goto free_left_path; |
| 4399 | 4397 | ||
| 4400 | status = ocfs2_find_path(et->et_ci, right_path, right_cpos); | 4398 | status = ocfs2_find_path(et->et_ci, right_path, right_cpos); |
| 4401 | if (status) | 4399 | if (status) |
| 4402 | goto out; | 4400 | goto free_right_path; |
| 4403 | 4401 | ||
| 4404 | new_el = path_leaf_el(right_path); | 4402 | new_el = path_leaf_el(right_path); |
| 4405 | rec = &new_el->l_recs[0]; | 4403 | rec = &new_el->l_recs[0]; |
| @@ -4413,7 +4411,7 @@ ocfs2_figure_merge_contig_type(struct ocfs2_extent_tree *et, | |||
| 4413 | (unsigned long long)le64_to_cpu(eb->h_blkno), | 4411 | (unsigned long long)le64_to_cpu(eb->h_blkno), |
| 4414 | le16_to_cpu(new_el->l_next_free_rec)); | 4412 | le16_to_cpu(new_el->l_next_free_rec)); |
| 4415 | status = -EINVAL; | 4413 | status = -EINVAL; |
| 4416 | goto out; | 4414 | goto free_right_path; |
| 4417 | } | 4415 | } |
| 4418 | rec = &new_el->l_recs[1]; | 4416 | rec = &new_el->l_recs[1]; |
| 4419 | } | 4417 | } |
| @@ -4430,12 +4428,11 @@ ocfs2_figure_merge_contig_type(struct ocfs2_extent_tree *et, | |||
| 4430 | ret = contig_type; | 4428 | ret = contig_type; |
| 4431 | } | 4429 | } |
| 4432 | 4430 | ||
| 4433 | out: | 4431 | free_right_path: |
| 4434 | if (left_path) | 4432 | ocfs2_free_path(right_path); |
| 4435 | ocfs2_free_path(left_path); | 4433 | free_left_path: |
| 4436 | if (right_path) | 4434 | ocfs2_free_path(left_path); |
| 4437 | ocfs2_free_path(right_path); | 4435 | exit: |
| 4438 | |||
| 4439 | return ret; | 4436 | return ret; |
| 4440 | } | 4437 | } |
| 4441 | 4438 | ||
| @@ -6858,13 +6855,13 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode, | |||
| 6858 | if (pages == NULL) { | 6855 | if (pages == NULL) { |
| 6859 | ret = -ENOMEM; | 6856 | ret = -ENOMEM; |
| 6860 | mlog_errno(ret); | 6857 | mlog_errno(ret); |
| 6861 | goto out; | 6858 | return ret; |
| 6862 | } | 6859 | } |
| 6863 | 6860 | ||
| 6864 | ret = ocfs2_reserve_clusters(osb, 1, &data_ac); | 6861 | ret = ocfs2_reserve_clusters(osb, 1, &data_ac); |
| 6865 | if (ret) { | 6862 | if (ret) { |
| 6866 | mlog_errno(ret); | 6863 | mlog_errno(ret); |
| 6867 | goto out; | 6864 | goto free_pages; |
| 6868 | } | 6865 | } |
| 6869 | } | 6866 | } |
| 6870 | 6867 | ||
| @@ -6996,9 +6993,8 @@ out_commit: | |||
| 6996 | out: | 6993 | out: |
| 6997 | if (data_ac) | 6994 | if (data_ac) |
| 6998 | ocfs2_free_alloc_context(data_ac); | 6995 | ocfs2_free_alloc_context(data_ac); |
| 6999 | if (pages) | 6996 | free_pages: |
| 7000 | kfree(pages); | 6997 | kfree(pages); |
| 7001 | |||
| 7002 | return ret; | 6998 | return ret; |
| 7003 | } | 6999 | } |
| 7004 | 7000 | ||
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 44db1808cdb5..8d2bc840c288 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c | |||
| @@ -29,6 +29,7 @@ | |||
| 29 | #include <linux/mpage.h> | 29 | #include <linux/mpage.h> |
| 30 | #include <linux/quotaops.h> | 30 | #include <linux/quotaops.h> |
| 31 | #include <linux/blkdev.h> | 31 | #include <linux/blkdev.h> |
| 32 | #include <linux/uio.h> | ||
| 32 | 33 | ||
| 33 | #include <cluster/masklog.h> | 34 | #include <cluster/masklog.h> |
| 34 | 35 | ||
| @@ -663,6 +664,117 @@ static int ocfs2_is_overwrite(struct ocfs2_super *osb, | |||
| 663 | return 0; | 664 | return 0; |
| 664 | } | 665 | } |
| 665 | 666 | ||
| 667 | static int ocfs2_direct_IO_zero_extend(struct ocfs2_super *osb, | ||
| 668 | struct inode *inode, loff_t offset, | ||
| 669 | u64 zero_len, int cluster_align) | ||
| 670 | { | ||
| 671 | u32 p_cpos = 0; | ||
| 672 | u32 v_cpos = ocfs2_bytes_to_clusters(osb->sb, i_size_read(inode)); | ||
| 673 | unsigned int num_clusters = 0; | ||
| 674 | unsigned int ext_flags = 0; | ||
| 675 | int ret = 0; | ||
| 676 | |||
| 677 | if (offset <= i_size_read(inode) || cluster_align) | ||
| 678 | return 0; | ||
| 679 | |||
| 680 | ret = ocfs2_get_clusters(inode, v_cpos, &p_cpos, &num_clusters, | ||
| 681 | &ext_flags); | ||
| 682 | if (ret < 0) { | ||
| 683 | mlog_errno(ret); | ||
| 684 | return ret; | ||
| 685 | } | ||
| 686 | |||
| 687 | if (p_cpos && !(ext_flags & OCFS2_EXT_UNWRITTEN)) { | ||
| 688 | u64 s = i_size_read(inode); | ||
| 689 | sector_t sector = (p_cpos << (osb->s_clustersize_bits - 9)) + | ||
| 690 | (do_div(s, osb->s_clustersize) >> 9); | ||
| 691 | |||
| 692 | ret = blkdev_issue_zeroout(osb->sb->s_bdev, sector, | ||
| 693 | zero_len >> 9, GFP_NOFS, false); | ||
| 694 | if (ret < 0) | ||
| 695 | mlog_errno(ret); | ||
| 696 | } | ||
| 697 | |||
| 698 | return ret; | ||
| 699 | } | ||
| 700 | |||
| 701 | static int ocfs2_direct_IO_extend_no_holes(struct ocfs2_super *osb, | ||
| 702 | struct inode *inode, loff_t offset) | ||
| 703 | { | ||
| 704 | u64 zero_start, zero_len, total_zero_len; | ||
| 705 | u32 p_cpos = 0, clusters_to_add; | ||
| 706 | u32 v_cpos = ocfs2_bytes_to_clusters(osb->sb, i_size_read(inode)); | ||
| 707 | unsigned int num_clusters = 0; | ||
| 708 | unsigned int ext_flags = 0; | ||
| 709 | u32 size_div, offset_div; | ||
| 710 | int ret = 0; | ||
| 711 | |||
| 712 | { | ||
| 713 | u64 o = offset; | ||
| 714 | u64 s = i_size_read(inode); | ||
| 715 | |||
| 716 | offset_div = do_div(o, osb->s_clustersize); | ||
| 717 | size_div = do_div(s, osb->s_clustersize); | ||
| 718 | } | ||
| 719 | |||
| 720 | if (offset <= i_size_read(inode)) | ||
| 721 | return 0; | ||
| 722 | |||
| 723 | clusters_to_add = ocfs2_bytes_to_clusters(inode->i_sb, offset) - | ||
| 724 | ocfs2_bytes_to_clusters(inode->i_sb, i_size_read(inode)); | ||
| 725 | total_zero_len = offset - i_size_read(inode); | ||
| 726 | if (clusters_to_add) | ||
| 727 | total_zero_len -= offset_div; | ||
| 728 | |||
| 729 | /* Allocate clusters to fill out holes, and this is only needed | ||
| 730 | * when we add more than one clusters. Otherwise the cluster will | ||
| 731 | * be allocated during direct IO */ | ||
| 732 | if (clusters_to_add > 1) { | ||
| 733 | ret = ocfs2_extend_allocation(inode, | ||
| 734 | OCFS2_I(inode)->ip_clusters, | ||
| 735 | clusters_to_add - 1, 0); | ||
| 736 | if (ret) { | ||
| 737 | mlog_errno(ret); | ||
| 738 | goto out; | ||
| 739 | } | ||
| 740 | } | ||
| 741 | |||
| 742 | while (total_zero_len) { | ||
| 743 | ret = ocfs2_get_clusters(inode, v_cpos, &p_cpos, &num_clusters, | ||
| 744 | &ext_flags); | ||
| 745 | if (ret < 0) { | ||
| 746 | mlog_errno(ret); | ||
| 747 | goto out; | ||
| 748 | } | ||
| 749 | |||
| 750 | zero_start = ocfs2_clusters_to_bytes(osb->sb, p_cpos) + | ||
| 751 | size_div; | ||
| 752 | zero_len = ocfs2_clusters_to_bytes(osb->sb, num_clusters) - | ||
| 753 | size_div; | ||
| 754 | zero_len = min(total_zero_len, zero_len); | ||
| 755 | |||
| 756 | if (p_cpos && !(ext_flags & OCFS2_EXT_UNWRITTEN)) { | ||
| 757 | ret = blkdev_issue_zeroout(osb->sb->s_bdev, | ||
| 758 | zero_start >> 9, zero_len >> 9, | ||
| 759 | GFP_NOFS, false); | ||
| 760 | if (ret < 0) { | ||
| 761 | mlog_errno(ret); | ||
| 762 | goto out; | ||
| 763 | } | ||
| 764 | } | ||
| 765 | |||
| 766 | total_zero_len -= zero_len; | ||
| 767 | v_cpos += ocfs2_bytes_to_clusters(osb->sb, zero_len + size_div); | ||
| 768 | |||
| 769 | /* Only at first iteration can be cluster not aligned. | ||
| 770 | * So set size_div to 0 for the rest */ | ||
| 771 | size_div = 0; | ||
| 772 | } | ||
| 773 | |||
| 774 | out: | ||
| 775 | return ret; | ||
| 776 | } | ||
| 777 | |||
| 666 | static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb, | 778 | static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb, |
| 667 | struct iov_iter *iter, | 779 | struct iov_iter *iter, |
| 668 | loff_t offset) | 780 | loff_t offset) |
| @@ -677,8 +789,8 @@ static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb, | |||
| 677 | struct buffer_head *di_bh = NULL; | 789 | struct buffer_head *di_bh = NULL; |
| 678 | size_t count = iter->count; | 790 | size_t count = iter->count; |
| 679 | journal_t *journal = osb->journal->j_journal; | 791 | journal_t *journal = osb->journal->j_journal; |
| 680 | u32 zero_len; | 792 | u64 zero_len_head, zero_len_tail; |
| 681 | int cluster_align; | 793 | int cluster_align_head, cluster_align_tail; |
| 682 | loff_t final_size = offset + count; | 794 | loff_t final_size = offset + count; |
| 683 | int append_write = offset >= i_size_read(inode) ? 1 : 0; | 795 | int append_write = offset >= i_size_read(inode) ? 1 : 0; |
| 684 | unsigned int num_clusters = 0; | 796 | unsigned int num_clusters = 0; |
| @@ -686,9 +798,16 @@ static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb, | |||
| 686 | 798 | ||
| 687 | { | 799 | { |
| 688 | u64 o = offset; | 800 | u64 o = offset; |
| 801 | u64 s = i_size_read(inode); | ||
| 802 | |||
| 803 | zero_len_head = do_div(o, 1 << osb->s_clustersize_bits); | ||
| 804 | cluster_align_head = !zero_len_head; | ||
| 689 | 805 | ||
| 690 | zero_len = do_div(o, 1 << osb->s_clustersize_bits); | 806 | zero_len_tail = osb->s_clustersize - |
| 691 | cluster_align = !zero_len; | 807 | do_div(s, osb->s_clustersize); |
| 808 | if ((offset - i_size_read(inode)) < zero_len_tail) | ||
| 809 | zero_len_tail = offset - i_size_read(inode); | ||
| 810 | cluster_align_tail = !zero_len_tail; | ||
| 692 | } | 811 | } |
| 693 | 812 | ||
| 694 | /* | 813 | /* |
| @@ -706,21 +825,23 @@ static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb, | |||
| 706 | } | 825 | } |
| 707 | 826 | ||
| 708 | if (append_write) { | 827 | if (append_write) { |
| 709 | ret = ocfs2_inode_lock(inode, &di_bh, 1); | 828 | ret = ocfs2_inode_lock(inode, NULL, 1); |
| 710 | if (ret < 0) { | 829 | if (ret < 0) { |
| 711 | mlog_errno(ret); | 830 | mlog_errno(ret); |
| 712 | goto clean_orphan; | 831 | goto clean_orphan; |
| 713 | } | 832 | } |
| 714 | 833 | ||
| 834 | /* zeroing out the previously allocated cluster tail | ||
| 835 | * that but not zeroed */ | ||
| 715 | if (ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))) | 836 | if (ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))) |
| 716 | ret = ocfs2_zero_extend(inode, di_bh, offset); | 837 | ret = ocfs2_direct_IO_zero_extend(osb, inode, offset, |
| 838 | zero_len_tail, cluster_align_tail); | ||
| 717 | else | 839 | else |
| 718 | ret = ocfs2_extend_no_holes(inode, di_bh, offset, | 840 | ret = ocfs2_direct_IO_extend_no_holes(osb, inode, |
| 719 | offset); | 841 | offset); |
| 720 | if (ret < 0) { | 842 | if (ret < 0) { |
| 721 | mlog_errno(ret); | 843 | mlog_errno(ret); |
| 722 | ocfs2_inode_unlock(inode, 1); | 844 | ocfs2_inode_unlock(inode, 1); |
| 723 | brelse(di_bh); | ||
| 724 | goto clean_orphan; | 845 | goto clean_orphan; |
| 725 | } | 846 | } |
| 726 | 847 | ||
| @@ -728,13 +849,10 @@ static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb, | |||
| 728 | if (is_overwrite < 0) { | 849 | if (is_overwrite < 0) { |
| 729 | mlog_errno(is_overwrite); | 850 | mlog_errno(is_overwrite); |
| 730 | ocfs2_inode_unlock(inode, 1); | 851 | ocfs2_inode_unlock(inode, 1); |
| 731 | brelse(di_bh); | ||
| 732 | goto clean_orphan; | 852 | goto clean_orphan; |
| 733 | } | 853 | } |
| 734 | 854 | ||
| 735 | ocfs2_inode_unlock(inode, 1); | 855 | ocfs2_inode_unlock(inode, 1); |
| 736 | brelse(di_bh); | ||
| 737 | di_bh = NULL; | ||
| 738 | } | 856 | } |
| 739 | 857 | ||
| 740 | written = __blockdev_direct_IO(WRITE, iocb, inode, inode->i_sb->s_bdev, | 858 | written = __blockdev_direct_IO(WRITE, iocb, inode, inode->i_sb->s_bdev, |
| @@ -771,15 +889,23 @@ static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb, | |||
| 771 | if (ret < 0) | 889 | if (ret < 0) |
| 772 | mlog_errno(ret); | 890 | mlog_errno(ret); |
| 773 | } | 891 | } |
| 774 | } else if (written < 0 && append_write && !is_overwrite && | 892 | } else if (written > 0 && append_write && !is_overwrite && |
| 775 | !cluster_align) { | 893 | !cluster_align_head) { |
| 894 | /* zeroing out the allocated cluster head */ | ||
| 776 | u32 p_cpos = 0; | 895 | u32 p_cpos = 0; |
| 777 | u32 v_cpos = ocfs2_bytes_to_clusters(osb->sb, offset); | 896 | u32 v_cpos = ocfs2_bytes_to_clusters(osb->sb, offset); |
| 778 | 897 | ||
| 898 | ret = ocfs2_inode_lock(inode, NULL, 0); | ||
| 899 | if (ret < 0) { | ||
| 900 | mlog_errno(ret); | ||
| 901 | goto clean_orphan; | ||
| 902 | } | ||
| 903 | |||
| 779 | ret = ocfs2_get_clusters(inode, v_cpos, &p_cpos, | 904 | ret = ocfs2_get_clusters(inode, v_cpos, &p_cpos, |
| 780 | &num_clusters, &ext_flags); | 905 | &num_clusters, &ext_flags); |
| 781 | if (ret < 0) { | 906 | if (ret < 0) { |
| 782 | mlog_errno(ret); | 907 | mlog_errno(ret); |
| 908 | ocfs2_inode_unlock(inode, 0); | ||
| 783 | goto clean_orphan; | 909 | goto clean_orphan; |
| 784 | } | 910 | } |
| 785 | 911 | ||
| @@ -787,9 +913,11 @@ static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb, | |||
| 787 | 913 | ||
| 788 | ret = blkdev_issue_zeroout(osb->sb->s_bdev, | 914 | ret = blkdev_issue_zeroout(osb->sb->s_bdev, |
| 789 | p_cpos << (osb->s_clustersize_bits - 9), | 915 | p_cpos << (osb->s_clustersize_bits - 9), |
| 790 | zero_len >> 9, GFP_KERNEL, false); | 916 | zero_len_head >> 9, GFP_NOFS, false); |
| 791 | if (ret < 0) | 917 | if (ret < 0) |
| 792 | mlog_errno(ret); | 918 | mlog_errno(ret); |
| 919 | |||
| 920 | ocfs2_inode_unlock(inode, 0); | ||
| 793 | } | 921 | } |
| 794 | 922 | ||
| 795 | clean_orphan: | 923 | clean_orphan: |
diff --git a/fs/ocfs2/aops.h b/fs/ocfs2/aops.h index 6cae155d54df..dd59599b022d 100644 --- a/fs/ocfs2/aops.h +++ b/fs/ocfs2/aops.h | |||
| @@ -22,7 +22,7 @@ | |||
| 22 | #ifndef OCFS2_AOPS_H | 22 | #ifndef OCFS2_AOPS_H |
| 23 | #define OCFS2_AOPS_H | 23 | #define OCFS2_AOPS_H |
| 24 | 24 | ||
| 25 | #include <linux/aio.h> | 25 | #include <linux/fs.h> |
| 26 | 26 | ||
| 27 | handle_t *ocfs2_start_walk_page_trans(struct inode *inode, | 27 | handle_t *ocfs2_start_walk_page_trans(struct inode *inode, |
| 28 | struct page *page, | 28 | struct page *page, |
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 16eff45727ee..8e19b9d7aba8 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c | |||
| @@ -1312,7 +1312,9 @@ static int o2hb_debug_init(void) | |||
| 1312 | int ret = -ENOMEM; | 1312 | int ret = -ENOMEM; |
| 1313 | 1313 | ||
| 1314 | o2hb_debug_dir = debugfs_create_dir(O2HB_DEBUG_DIR, NULL); | 1314 | o2hb_debug_dir = debugfs_create_dir(O2HB_DEBUG_DIR, NULL); |
| 1315 | if (!o2hb_debug_dir) { | 1315 | if (IS_ERR_OR_NULL(o2hb_debug_dir)) { |
| 1316 | ret = o2hb_debug_dir ? | ||
| 1317 | PTR_ERR(o2hb_debug_dir) : -ENOMEM; | ||
| 1316 | mlog_errno(ret); | 1318 | mlog_errno(ret); |
| 1317 | goto bail; | 1319 | goto bail; |
| 1318 | } | 1320 | } |
| @@ -1325,7 +1327,9 @@ static int o2hb_debug_init(void) | |||
| 1325 | sizeof(o2hb_live_node_bitmap), | 1327 | sizeof(o2hb_live_node_bitmap), |
| 1326 | O2NM_MAX_NODES, | 1328 | O2NM_MAX_NODES, |
| 1327 | o2hb_live_node_bitmap); | 1329 | o2hb_live_node_bitmap); |
| 1328 | if (!o2hb_debug_livenodes) { | 1330 | if (IS_ERR_OR_NULL(o2hb_debug_livenodes)) { |
| 1331 | ret = o2hb_debug_livenodes ? | ||
| 1332 | PTR_ERR(o2hb_debug_livenodes) : -ENOMEM; | ||
| 1329 | mlog_errno(ret); | 1333 | mlog_errno(ret); |
| 1330 | goto bail; | 1334 | goto bail; |
| 1331 | } | 1335 | } |
| @@ -1338,7 +1342,9 @@ static int o2hb_debug_init(void) | |||
| 1338 | sizeof(o2hb_live_region_bitmap), | 1342 | sizeof(o2hb_live_region_bitmap), |
| 1339 | O2NM_MAX_REGIONS, | 1343 | O2NM_MAX_REGIONS, |
| 1340 | o2hb_live_region_bitmap); | 1344 | o2hb_live_region_bitmap); |
| 1341 | if (!o2hb_debug_liveregions) { | 1345 | if (IS_ERR_OR_NULL(o2hb_debug_liveregions)) { |
| 1346 | ret = o2hb_debug_liveregions ? | ||
| 1347 | PTR_ERR(o2hb_debug_liveregions) : -ENOMEM; | ||
| 1342 | mlog_errno(ret); | 1348 | mlog_errno(ret); |
| 1343 | goto bail; | 1349 | goto bail; |
| 1344 | } | 1350 | } |
| @@ -1352,7 +1358,9 @@ static int o2hb_debug_init(void) | |||
| 1352 | sizeof(o2hb_quorum_region_bitmap), | 1358 | sizeof(o2hb_quorum_region_bitmap), |
| 1353 | O2NM_MAX_REGIONS, | 1359 | O2NM_MAX_REGIONS, |
| 1354 | o2hb_quorum_region_bitmap); | 1360 | o2hb_quorum_region_bitmap); |
| 1355 | if (!o2hb_debug_quorumregions) { | 1361 | if (IS_ERR_OR_NULL(o2hb_debug_quorumregions)) { |
| 1362 | ret = o2hb_debug_quorumregions ? | ||
| 1363 | PTR_ERR(o2hb_debug_quorumregions) : -ENOMEM; | ||
| 1356 | mlog_errno(ret); | 1364 | mlog_errno(ret); |
| 1357 | goto bail; | 1365 | goto bail; |
| 1358 | } | 1366 | } |
| @@ -1366,7 +1374,9 @@ static int o2hb_debug_init(void) | |||
| 1366 | sizeof(o2hb_failed_region_bitmap), | 1374 | sizeof(o2hb_failed_region_bitmap), |
| 1367 | O2NM_MAX_REGIONS, | 1375 | O2NM_MAX_REGIONS, |
| 1368 | o2hb_failed_region_bitmap); | 1376 | o2hb_failed_region_bitmap); |
| 1369 | if (!o2hb_debug_failedregions) { | 1377 | if (IS_ERR_OR_NULL(o2hb_debug_failedregions)) { |
| 1378 | ret = o2hb_debug_failedregions ? | ||
| 1379 | PTR_ERR(o2hb_debug_failedregions) : -ENOMEM; | ||
| 1370 | mlog_errno(ret); | 1380 | mlog_errno(ret); |
| 1371 | goto bail; | 1381 | goto bail; |
| 1372 | } | 1382 | } |
| @@ -2000,7 +2010,8 @@ static int o2hb_debug_region_init(struct o2hb_region *reg, struct dentry *dir) | |||
| 2000 | 2010 | ||
| 2001 | reg->hr_debug_dir = | 2011 | reg->hr_debug_dir = |
| 2002 | debugfs_create_dir(config_item_name(®->hr_item), dir); | 2012 | debugfs_create_dir(config_item_name(®->hr_item), dir); |
| 2003 | if (!reg->hr_debug_dir) { | 2013 | if (IS_ERR_OR_NULL(reg->hr_debug_dir)) { |
| 2014 | ret = reg->hr_debug_dir ? PTR_ERR(reg->hr_debug_dir) : -ENOMEM; | ||
| 2004 | mlog_errno(ret); | 2015 | mlog_errno(ret); |
| 2005 | goto bail; | 2016 | goto bail; |
| 2006 | } | 2017 | } |
| @@ -2013,7 +2024,9 @@ static int o2hb_debug_region_init(struct o2hb_region *reg, struct dentry *dir) | |||
| 2013 | O2HB_DB_TYPE_REGION_LIVENODES, | 2024 | O2HB_DB_TYPE_REGION_LIVENODES, |
| 2014 | sizeof(reg->hr_live_node_bitmap), | 2025 | sizeof(reg->hr_live_node_bitmap), |
| 2015 | O2NM_MAX_NODES, reg); | 2026 | O2NM_MAX_NODES, reg); |
| 2016 | if (!reg->hr_debug_livenodes) { | 2027 | if (IS_ERR_OR_NULL(reg->hr_debug_livenodes)) { |
| 2028 | ret = reg->hr_debug_livenodes ? | ||
| 2029 | PTR_ERR(reg->hr_debug_livenodes) : -ENOMEM; | ||
| 2017 | mlog_errno(ret); | 2030 | mlog_errno(ret); |
| 2018 | goto bail; | 2031 | goto bail; |
| 2019 | } | 2032 | } |
| @@ -2025,7 +2038,9 @@ static int o2hb_debug_region_init(struct o2hb_region *reg, struct dentry *dir) | |||
| 2025 | sizeof(*(reg->hr_db_regnum)), | 2038 | sizeof(*(reg->hr_db_regnum)), |
| 2026 | O2HB_DB_TYPE_REGION_NUMBER, | 2039 | O2HB_DB_TYPE_REGION_NUMBER, |
| 2027 | 0, O2NM_MAX_NODES, reg); | 2040 | 0, O2NM_MAX_NODES, reg); |
| 2028 | if (!reg->hr_debug_regnum) { | 2041 | if (IS_ERR_OR_NULL(reg->hr_debug_regnum)) { |
| 2042 | ret = reg->hr_debug_regnum ? | ||
| 2043 | PTR_ERR(reg->hr_debug_regnum) : -ENOMEM; | ||
| 2029 | mlog_errno(ret); | 2044 | mlog_errno(ret); |
| 2030 | goto bail; | 2045 | goto bail; |
| 2031 | } | 2046 | } |
| @@ -2037,7 +2052,9 @@ static int o2hb_debug_region_init(struct o2hb_region *reg, struct dentry *dir) | |||
| 2037 | sizeof(*(reg->hr_db_elapsed_time)), | 2052 | sizeof(*(reg->hr_db_elapsed_time)), |
| 2038 | O2HB_DB_TYPE_REGION_ELAPSED_TIME, | 2053 | O2HB_DB_TYPE_REGION_ELAPSED_TIME, |
| 2039 | 0, 0, reg); | 2054 | 0, 0, reg); |
| 2040 | if (!reg->hr_debug_elapsed_time) { | 2055 | if (IS_ERR_OR_NULL(reg->hr_debug_elapsed_time)) { |
| 2056 | ret = reg->hr_debug_elapsed_time ? | ||
| 2057 | PTR_ERR(reg->hr_debug_elapsed_time) : -ENOMEM; | ||
| 2041 | mlog_errno(ret); | 2058 | mlog_errno(ret); |
| 2042 | goto bail; | 2059 | goto bail; |
| 2043 | } | 2060 | } |
| @@ -2049,13 +2066,16 @@ static int o2hb_debug_region_init(struct o2hb_region *reg, struct dentry *dir) | |||
| 2049 | sizeof(*(reg->hr_db_pinned)), | 2066 | sizeof(*(reg->hr_db_pinned)), |
| 2050 | O2HB_DB_TYPE_REGION_PINNED, | 2067 | O2HB_DB_TYPE_REGION_PINNED, |
| 2051 | 0, 0, reg); | 2068 | 0, 0, reg); |
| 2052 | if (!reg->hr_debug_pinned) { | 2069 | if (IS_ERR_OR_NULL(reg->hr_debug_pinned)) { |
| 2070 | ret = reg->hr_debug_pinned ? | ||
| 2071 | PTR_ERR(reg->hr_debug_pinned) : -ENOMEM; | ||
| 2053 | mlog_errno(ret); | 2072 | mlog_errno(ret); |
| 2054 | goto bail; | 2073 | goto bail; |
| 2055 | } | 2074 | } |
| 2056 | 2075 | ||
| 2057 | ret = 0; | 2076 | return 0; |
| 2058 | bail: | 2077 | bail: |
| 2078 | debugfs_remove_recursive(reg->hr_debug_dir); | ||
| 2059 | return ret; | 2079 | return ret; |
| 2060 | } | 2080 | } |
| 2061 | 2081 | ||
diff --git a/fs/ocfs2/cluster/masklog.h b/fs/ocfs2/cluster/masklog.h index 2260fb9e6508..7fdc25a4d8c0 100644 --- a/fs/ocfs2/cluster/masklog.h +++ b/fs/ocfs2/cluster/masklog.h | |||
| @@ -196,13 +196,14 @@ extern struct mlog_bits mlog_and_bits, mlog_not_bits; | |||
| 196 | } \ | 196 | } \ |
| 197 | } while (0) | 197 | } while (0) |
| 198 | 198 | ||
| 199 | #define mlog_errno(st) do { \ | 199 | #define mlog_errno(st) ({ \ |
| 200 | int _st = (st); \ | 200 | int _st = (st); \ |
| 201 | if (_st != -ERESTARTSYS && _st != -EINTR && \ | 201 | if (_st != -ERESTARTSYS && _st != -EINTR && \ |
| 202 | _st != AOP_TRUNCATED_PAGE && _st != -ENOSPC && \ | 202 | _st != AOP_TRUNCATED_PAGE && _st != -ENOSPC && \ |
| 203 | _st != -EDQUOT) \ | 203 | _st != -EDQUOT) \ |
| 204 | mlog(ML_ERROR, "status = %lld\n", (long long)_st); \ | 204 | mlog(ML_ERROR, "status = %lld\n", (long long)_st); \ |
| 205 | } while (0) | 205 | _st; \ |
| 206 | }) | ||
| 206 | 207 | ||
| 207 | #define mlog_bug_on_msg(cond, fmt, args...) do { \ | 208 | #define mlog_bug_on_msg(cond, fmt, args...) do { \ |
| 208 | if (cond) { \ | 209 | if (cond) { \ |
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index b08050bd3f2e..ccd4dcfc3645 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c | |||
| @@ -18,7 +18,7 @@ | |||
| 18 | * | 18 | * |
| 19 | * linux/fs/minix/dir.c | 19 | * linux/fs/minix/dir.c |
| 20 | * | 20 | * |
| 21 | * Copyright (C) 1991, 1992 Linux Torvalds | 21 | * Copyright (C) 1991, 1992 Linus Torvalds |
| 22 | * | 22 | * |
| 23 | * This program is free software; you can redistribute it and/or | 23 | * This program is free software; you can redistribute it and/or |
| 24 | * modify it under the terms of the GNU General Public | 24 | * modify it under the terms of the GNU General Public |
| @@ -2047,22 +2047,19 @@ int ocfs2_check_dir_for_entry(struct inode *dir, | |||
| 2047 | const char *name, | 2047 | const char *name, |
| 2048 | int namelen) | 2048 | int namelen) |
| 2049 | { | 2049 | { |
| 2050 | int ret; | 2050 | int ret = 0; |
| 2051 | struct ocfs2_dir_lookup_result lookup = { NULL, }; | 2051 | struct ocfs2_dir_lookup_result lookup = { NULL, }; |
| 2052 | 2052 | ||
| 2053 | trace_ocfs2_check_dir_for_entry( | 2053 | trace_ocfs2_check_dir_for_entry( |
| 2054 | (unsigned long long)OCFS2_I(dir)->ip_blkno, namelen, name); | 2054 | (unsigned long long)OCFS2_I(dir)->ip_blkno, namelen, name); |
| 2055 | 2055 | ||
| 2056 | ret = -EEXIST; | 2056 | if (ocfs2_find_entry(name, namelen, dir, &lookup) == 0) { |
| 2057 | if (ocfs2_find_entry(name, namelen, dir, &lookup) == 0) | 2057 | ret = -EEXIST; |
| 2058 | goto bail; | 2058 | mlog_errno(ret); |
| 2059 | } | ||
| 2059 | 2060 | ||
| 2060 | ret = 0; | ||
| 2061 | bail: | ||
| 2062 | ocfs2_free_dir_lookup_result(&lookup); | 2061 | ocfs2_free_dir_lookup_result(&lookup); |
| 2063 | 2062 | ||
| 2064 | if (ret) | ||
| 2065 | mlog_errno(ret); | ||
| 2066 | return ret; | 2063 | return ret; |
| 2067 | } | 2064 | } |
| 2068 | 2065 | ||
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 11849a44dc5a..956edf67be20 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c | |||
| @@ -1391,6 +1391,11 @@ static int __ocfs2_cluster_lock(struct ocfs2_super *osb, | |||
| 1391 | int noqueue_attempted = 0; | 1391 | int noqueue_attempted = 0; |
| 1392 | int dlm_locked = 0; | 1392 | int dlm_locked = 0; |
| 1393 | 1393 | ||
| 1394 | if (!(lockres->l_flags & OCFS2_LOCK_INITIALIZED)) { | ||
| 1395 | mlog_errno(-EINVAL); | ||
| 1396 | return -EINVAL; | ||
| 1397 | } | ||
| 1398 | |||
| 1394 | ocfs2_init_mask_waiter(&mw); | 1399 | ocfs2_init_mask_waiter(&mw); |
| 1395 | 1400 | ||
| 1396 | if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) | 1401 | if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) |
| @@ -2954,7 +2959,7 @@ static int ocfs2_dlm_init_debug(struct ocfs2_super *osb) | |||
| 2954 | osb->osb_debug_root, | 2959 | osb->osb_debug_root, |
| 2955 | osb, | 2960 | osb, |
| 2956 | &ocfs2_dlm_debug_fops); | 2961 | &ocfs2_dlm_debug_fops); |
| 2957 | if (!dlm_debug->d_locking_state) { | 2962 | if (IS_ERR_OR_NULL(dlm_debug->d_locking_state)) { |
| 2958 | ret = -EINVAL; | 2963 | ret = -EINVAL; |
| 2959 | mlog(ML_ERROR, | 2964 | mlog(ML_ERROR, |
| 2960 | "Unable to create locking state debugfs file.\n"); | 2965 | "Unable to create locking state debugfs file.\n"); |
diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c index 29651167190d..540dc4bdd042 100644 --- a/fs/ocfs2/export.c +++ b/fs/ocfs2/export.c | |||
| @@ -82,7 +82,6 @@ static struct dentry *ocfs2_get_dentry(struct super_block *sb, | |||
| 82 | } | 82 | } |
| 83 | 83 | ||
| 84 | status = ocfs2_test_inode_bit(osb, blkno, &set); | 84 | status = ocfs2_test_inode_bit(osb, blkno, &set); |
| 85 | trace_ocfs2_get_dentry_test_bit(status, set); | ||
| 86 | if (status < 0) { | 85 | if (status < 0) { |
| 87 | if (status == -EINVAL) { | 86 | if (status == -EINVAL) { |
| 88 | /* | 87 | /* |
| @@ -96,6 +95,7 @@ static struct dentry *ocfs2_get_dentry(struct super_block *sb, | |||
| 96 | goto unlock_nfs_sync; | 95 | goto unlock_nfs_sync; |
| 97 | } | 96 | } |
| 98 | 97 | ||
| 98 | trace_ocfs2_get_dentry_test_bit(status, set); | ||
| 99 | /* If the inode allocator bit is clear, this inode must be stale */ | 99 | /* If the inode allocator bit is clear, this inode must be stale */ |
| 100 | if (!set) { | 100 | if (!set) { |
| 101 | status = -ESTALE; | 101 | status = -ESTALE; |
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 46e0d4e857c7..91f03ce98108 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c | |||
| @@ -2280,7 +2280,7 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb, | |||
| 2280 | file->f_path.dentry->d_name.name, | 2280 | file->f_path.dentry->d_name.name, |
| 2281 | (unsigned int)from->nr_segs); /* GRRRRR */ | 2281 | (unsigned int)from->nr_segs); /* GRRRRR */ |
| 2282 | 2282 | ||
| 2283 | if (iocb->ki_nbytes == 0) | 2283 | if (count == 0) |
| 2284 | return 0; | 2284 | return 0; |
| 2285 | 2285 | ||
| 2286 | appending = file->f_flags & O_APPEND ? 1 : 0; | 2286 | appending = file->f_flags & O_APPEND ? 1 : 0; |
| @@ -2330,8 +2330,7 @@ relock: | |||
| 2330 | } | 2330 | } |
| 2331 | 2331 | ||
| 2332 | can_do_direct = direct_io; | 2332 | can_do_direct = direct_io; |
| 2333 | ret = ocfs2_prepare_inode_for_write(file, ppos, | 2333 | ret = ocfs2_prepare_inode_for_write(file, ppos, count, appending, |
| 2334 | iocb->ki_nbytes, appending, | ||
| 2335 | &can_do_direct, &has_refcount); | 2334 | &can_do_direct, &has_refcount); |
| 2336 | if (ret < 0) { | 2335 | if (ret < 0) { |
| 2337 | mlog_errno(ret); | 2336 | mlog_errno(ret); |
| @@ -2339,8 +2338,7 @@ relock: | |||
| 2339 | } | 2338 | } |
| 2340 | 2339 | ||
| 2341 | if (direct_io && !is_sync_kiocb(iocb)) | 2340 | if (direct_io && !is_sync_kiocb(iocb)) |
| 2342 | unaligned_dio = ocfs2_is_io_unaligned(inode, iocb->ki_nbytes, | 2341 | unaligned_dio = ocfs2_is_io_unaligned(inode, count, *ppos); |
| 2343 | *ppos); | ||
| 2344 | 2342 | ||
| 2345 | /* | 2343 | /* |
| 2346 | * We can't complete the direct I/O as requested, fall back to | 2344 | * We can't complete the direct I/O as requested, fall back to |
| @@ -2394,7 +2392,6 @@ relock: | |||
| 2394 | /* | 2392 | /* |
| 2395 | * for completing the rest of the request. | 2393 | * for completing the rest of the request. |
| 2396 | */ | 2394 | */ |
| 2397 | *ppos += written; | ||
| 2398 | count -= written; | 2395 | count -= written; |
| 2399 | written_buffered = generic_perform_write(file, from, *ppos); | 2396 | written_buffered = generic_perform_write(file, from, *ppos); |
| 2400 | /* | 2397 | /* |
| @@ -2409,7 +2406,6 @@ relock: | |||
| 2409 | goto out_dio; | 2406 | goto out_dio; |
| 2410 | } | 2407 | } |
| 2411 | 2408 | ||
| 2412 | iocb->ki_pos = *ppos + written_buffered; | ||
| 2413 | /* We need to ensure that the page cache pages are written to | 2409 | /* We need to ensure that the page cache pages are written to |
| 2414 | * disk and invalidated to preserve the expected O_DIRECT | 2410 | * disk and invalidated to preserve the expected O_DIRECT |
| 2415 | * semantics. | 2411 | * semantics. |
| @@ -2418,6 +2414,7 @@ relock: | |||
| 2418 | ret = filemap_write_and_wait_range(file->f_mapping, *ppos, | 2414 | ret = filemap_write_and_wait_range(file->f_mapping, *ppos, |
| 2419 | endbyte); | 2415 | endbyte); |
| 2420 | if (ret == 0) { | 2416 | if (ret == 0) { |
| 2417 | iocb->ki_pos = *ppos + written_buffered; | ||
| 2421 | written += written_buffered; | 2418 | written += written_buffered; |
| 2422 | invalidate_mapping_pages(mapping, | 2419 | invalidate_mapping_pages(mapping, |
| 2423 | *ppos >> PAGE_CACHE_SHIFT, | 2420 | *ppos >> PAGE_CACHE_SHIFT, |
| @@ -2440,10 +2437,14 @@ out_dio: | |||
| 2440 | /* buffered aio wouldn't have proper lock coverage today */ | 2437 | /* buffered aio wouldn't have proper lock coverage today */ |
| 2441 | BUG_ON(ret == -EIOCBQUEUED && !(file->f_flags & O_DIRECT)); | 2438 | BUG_ON(ret == -EIOCBQUEUED && !(file->f_flags & O_DIRECT)); |
| 2442 | 2439 | ||
| 2440 | if (unlikely(written <= 0)) | ||
| 2441 | goto no_sync; | ||
| 2442 | |||
| 2443 | if (((file->f_flags & O_DSYNC) && !direct_io) || IS_SYNC(inode) || | 2443 | if (((file->f_flags & O_DSYNC) && !direct_io) || IS_SYNC(inode) || |
| 2444 | ((file->f_flags & O_DIRECT) && !direct_io)) { | 2444 | ((file->f_flags & O_DIRECT) && !direct_io)) { |
| 2445 | ret = filemap_fdatawrite_range(file->f_mapping, *ppos, | 2445 | ret = filemap_fdatawrite_range(file->f_mapping, |
| 2446 | *ppos + count - 1); | 2446 | iocb->ki_pos - written, |
| 2447 | iocb->ki_pos - 1); | ||
| 2447 | if (ret < 0) | 2448 | if (ret < 0) |
| 2448 | written = ret; | 2449 | written = ret; |
| 2449 | 2450 | ||
| @@ -2454,10 +2455,12 @@ out_dio: | |||
| 2454 | } | 2455 | } |
| 2455 | 2456 | ||
| 2456 | if (!ret) | 2457 | if (!ret) |
| 2457 | ret = filemap_fdatawait_range(file->f_mapping, *ppos, | 2458 | ret = filemap_fdatawait_range(file->f_mapping, |
| 2458 | *ppos + count - 1); | 2459 | iocb->ki_pos - written, |
| 2460 | iocb->ki_pos - 1); | ||
| 2459 | } | 2461 | } |
| 2460 | 2462 | ||
| 2463 | no_sync: | ||
| 2461 | /* | 2464 | /* |
| 2462 | * deep in g_f_a_w_n()->ocfs2_direct_IO we pass in a ocfs2_dio_end_io | 2465 | * deep in g_f_a_w_n()->ocfs2_direct_IO we pass in a ocfs2_dio_end_io |
| 2463 | * function pointer which is called when o_direct io completes so that | 2466 | * function pointer which is called when o_direct io completes so that |
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index 3025c0da6b8a..be71ca0937f7 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c | |||
| @@ -624,7 +624,7 @@ static int ocfs2_remove_inode(struct inode *inode, | |||
| 624 | ocfs2_get_system_file_inode(osb, INODE_ALLOC_SYSTEM_INODE, | 624 | ocfs2_get_system_file_inode(osb, INODE_ALLOC_SYSTEM_INODE, |
| 625 | le16_to_cpu(di->i_suballoc_slot)); | 625 | le16_to_cpu(di->i_suballoc_slot)); |
| 626 | if (!inode_alloc_inode) { | 626 | if (!inode_alloc_inode) { |
| 627 | status = -EEXIST; | 627 | status = -ENOENT; |
| 628 | mlog_errno(status); | 628 | mlog_errno(status); |
| 629 | goto bail; | 629 | goto bail; |
| 630 | } | 630 | } |
| @@ -742,7 +742,7 @@ static int ocfs2_wipe_inode(struct inode *inode, | |||
| 742 | ORPHAN_DIR_SYSTEM_INODE, | 742 | ORPHAN_DIR_SYSTEM_INODE, |
| 743 | orphaned_slot); | 743 | orphaned_slot); |
| 744 | if (!orphan_dir_inode) { | 744 | if (!orphan_dir_inode) { |
| 745 | status = -EEXIST; | 745 | status = -ENOENT; |
| 746 | mlog_errno(status); | 746 | mlog_errno(status); |
| 747 | goto bail; | 747 | goto bail; |
| 748 | } | 748 | } |
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c index 044013455621..857bbbcd39f3 100644 --- a/fs/ocfs2/localalloc.c +++ b/fs/ocfs2/localalloc.c | |||
| @@ -666,7 +666,7 @@ int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb, | |||
| 666 | if (le32_to_cpu(alloc->id1.bitmap1.i_used) != | 666 | if (le32_to_cpu(alloc->id1.bitmap1.i_used) != |
| 667 | ocfs2_local_alloc_count_bits(alloc)) { | 667 | ocfs2_local_alloc_count_bits(alloc)) { |
| 668 | ocfs2_error(osb->sb, "local alloc inode %llu says it has " | 668 | ocfs2_error(osb->sb, "local alloc inode %llu says it has " |
| 669 | "%u free bits, but a count shows %u", | 669 | "%u used bits, but a count shows %u", |
| 670 | (unsigned long long)le64_to_cpu(alloc->i_blkno), | 670 | (unsigned long long)le64_to_cpu(alloc->i_blkno), |
| 671 | le32_to_cpu(alloc->id1.bitmap1.i_used), | 671 | le32_to_cpu(alloc->id1.bitmap1.i_used), |
| 672 | ocfs2_local_alloc_count_bits(alloc)); | 672 | ocfs2_local_alloc_count_bits(alloc)); |
| @@ -839,7 +839,7 @@ static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb, | |||
| 839 | u32 *numbits, | 839 | u32 *numbits, |
| 840 | struct ocfs2_alloc_reservation *resv) | 840 | struct ocfs2_alloc_reservation *resv) |
| 841 | { | 841 | { |
| 842 | int numfound, bitoff, left, startoff, lastzero; | 842 | int numfound = 0, bitoff, left, startoff, lastzero; |
| 843 | int local_resv = 0; | 843 | int local_resv = 0; |
| 844 | struct ocfs2_alloc_reservation r; | 844 | struct ocfs2_alloc_reservation r; |
| 845 | void *bitmap = NULL; | 845 | void *bitmap = NULL; |
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index b5c3a5ea3ee6..09f90cbf0e24 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c | |||
| @@ -2322,10 +2322,10 @@ int ocfs2_orphan_del(struct ocfs2_super *osb, | |||
| 2322 | 2322 | ||
| 2323 | trace_ocfs2_orphan_del( | 2323 | trace_ocfs2_orphan_del( |
| 2324 | (unsigned long long)OCFS2_I(orphan_dir_inode)->ip_blkno, | 2324 | (unsigned long long)OCFS2_I(orphan_dir_inode)->ip_blkno, |
| 2325 | name, namelen); | 2325 | name, strlen(name)); |
| 2326 | 2326 | ||
| 2327 | /* find it's spot in the orphan directory */ | 2327 | /* find it's spot in the orphan directory */ |
| 2328 | status = ocfs2_find_entry(name, namelen, orphan_dir_inode, | 2328 | status = ocfs2_find_entry(name, strlen(name), orphan_dir_inode, |
| 2329 | &lookup); | 2329 | &lookup); |
| 2330 | if (status) { | 2330 | if (status) { |
| 2331 | mlog_errno(status); | 2331 | mlog_errno(status); |
| @@ -2808,7 +2808,7 @@ int ocfs2_mv_orphaned_inode_to_new(struct inode *dir, | |||
| 2808 | ORPHAN_DIR_SYSTEM_INODE, | 2808 | ORPHAN_DIR_SYSTEM_INODE, |
| 2809 | osb->slot_num); | 2809 | osb->slot_num); |
| 2810 | if (!orphan_dir_inode) { | 2810 | if (!orphan_dir_inode) { |
| 2811 | status = -EEXIST; | 2811 | status = -ENOENT; |
| 2812 | mlog_errno(status); | 2812 | mlog_errno(status); |
| 2813 | goto leave; | 2813 | goto leave; |
| 2814 | } | 2814 | } |
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index ee541f92dab4..df3a500789c7 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c | |||
| @@ -4276,7 +4276,7 @@ static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir, | |||
| 4276 | error = posix_acl_create(dir, &mode, &default_acl, &acl); | 4276 | error = posix_acl_create(dir, &mode, &default_acl, &acl); |
| 4277 | if (error) { | 4277 | if (error) { |
| 4278 | mlog_errno(error); | 4278 | mlog_errno(error); |
| 4279 | goto out; | 4279 | return error; |
| 4280 | } | 4280 | } |
| 4281 | 4281 | ||
| 4282 | error = ocfs2_create_inode_in_orphan(dir, mode, | 4282 | error = ocfs2_create_inode_in_orphan(dir, mode, |
diff --git a/fs/ocfs2/slot_map.c b/fs/ocfs2/slot_map.c index d5493e361a38..e78a203d44c8 100644 --- a/fs/ocfs2/slot_map.c +++ b/fs/ocfs2/slot_map.c | |||
| @@ -427,7 +427,7 @@ int ocfs2_init_slot_info(struct ocfs2_super *osb) | |||
| 427 | if (!si) { | 427 | if (!si) { |
| 428 | status = -ENOMEM; | 428 | status = -ENOMEM; |
| 429 | mlog_errno(status); | 429 | mlog_errno(status); |
| 430 | goto bail; | 430 | return status; |
| 431 | } | 431 | } |
| 432 | 432 | ||
| 433 | si->si_extended = ocfs2_uses_extended_slot_map(osb); | 433 | si->si_extended = ocfs2_uses_extended_slot_map(osb); |
| @@ -452,7 +452,7 @@ int ocfs2_init_slot_info(struct ocfs2_super *osb) | |||
| 452 | 452 | ||
| 453 | osb->slot_info = (struct ocfs2_slot_info *)si; | 453 | osb->slot_info = (struct ocfs2_slot_info *)si; |
| 454 | bail: | 454 | bail: |
| 455 | if (status < 0 && si) | 455 | if (status < 0) |
| 456 | __ocfs2_free_slot_info(si); | 456 | __ocfs2_free_slot_info(si); |
| 457 | 457 | ||
| 458 | return status; | 458 | return status; |
diff --git a/fs/ocfs2/stack_o2cb.c b/fs/ocfs2/stack_o2cb.c index 1724d43d3da1..220cae7bbdbc 100644 --- a/fs/ocfs2/stack_o2cb.c +++ b/fs/ocfs2/stack_o2cb.c | |||
| @@ -295,7 +295,7 @@ static int o2cb_cluster_check(void) | |||
| 295 | set_bit(node_num, netmap); | 295 | set_bit(node_num, netmap); |
| 296 | if (!memcmp(hbmap, netmap, sizeof(hbmap))) | 296 | if (!memcmp(hbmap, netmap, sizeof(hbmap))) |
| 297 | return 0; | 297 | return 0; |
| 298 | if (i < O2CB_MAP_STABILIZE_COUNT) | 298 | if (i < O2CB_MAP_STABILIZE_COUNT - 1) |
| 299 | msleep(1000); | 299 | msleep(1000); |
| 300 | } | 300 | } |
| 301 | 301 | ||
diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c index 720aa389e0ea..2768eb1da2b8 100644 --- a/fs/ocfs2/stack_user.c +++ b/fs/ocfs2/stack_user.c | |||
| @@ -1004,10 +1004,8 @@ static int user_cluster_connect(struct ocfs2_cluster_connection *conn) | |||
| 1004 | BUG_ON(conn == NULL); | 1004 | BUG_ON(conn == NULL); |
| 1005 | 1005 | ||
| 1006 | lc = kzalloc(sizeof(struct ocfs2_live_connection), GFP_KERNEL); | 1006 | lc = kzalloc(sizeof(struct ocfs2_live_connection), GFP_KERNEL); |
| 1007 | if (!lc) { | 1007 | if (!lc) |
| 1008 | rc = -ENOMEM; | 1008 | return -ENOMEM; |
| 1009 | goto out; | ||
| 1010 | } | ||
| 1011 | 1009 | ||
| 1012 | init_waitqueue_head(&lc->oc_wait); | 1010 | init_waitqueue_head(&lc->oc_wait); |
| 1013 | init_completion(&lc->oc_sync_wait); | 1011 | init_completion(&lc->oc_sync_wait); |
| @@ -1063,7 +1061,7 @@ static int user_cluster_connect(struct ocfs2_cluster_connection *conn) | |||
| 1063 | } | 1061 | } |
| 1064 | 1062 | ||
| 1065 | out: | 1063 | out: |
| 1066 | if (rc && lc) | 1064 | if (rc) |
| 1067 | kfree(lc); | 1065 | kfree(lc); |
| 1068 | return rc; | 1066 | return rc; |
| 1069 | } | 1067 | } |
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index 0cb889a17ae1..4479029630bb 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c | |||
| @@ -2499,6 +2499,8 @@ static int _ocfs2_free_suballoc_bits(handle_t *handle, | |||
| 2499 | alloc_bh, OCFS2_JOURNAL_ACCESS_WRITE); | 2499 | alloc_bh, OCFS2_JOURNAL_ACCESS_WRITE); |
| 2500 | if (status < 0) { | 2500 | if (status < 0) { |
| 2501 | mlog_errno(status); | 2501 | mlog_errno(status); |
| 2502 | ocfs2_block_group_set_bits(handle, alloc_inode, group, group_bh, | ||
| 2503 | start_bit, count); | ||
| 2502 | goto bail; | 2504 | goto bail; |
| 2503 | } | 2505 | } |
| 2504 | 2506 | ||
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 26675185b886..837ddce4b659 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c | |||
| @@ -1112,7 +1112,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) | |||
| 1112 | 1112 | ||
| 1113 | osb->osb_debug_root = debugfs_create_dir(osb->uuid_str, | 1113 | osb->osb_debug_root = debugfs_create_dir(osb->uuid_str, |
| 1114 | ocfs2_debugfs_root); | 1114 | ocfs2_debugfs_root); |
| 1115 | if (!osb->osb_debug_root) { | 1115 | if (IS_ERR_OR_NULL(osb->osb_debug_root)) { |
| 1116 | status = -EINVAL; | 1116 | status = -EINVAL; |
| 1117 | mlog(ML_ERROR, "Unable to create per-mount debugfs root.\n"); | 1117 | mlog(ML_ERROR, "Unable to create per-mount debugfs root.\n"); |
| 1118 | goto read_super_error; | 1118 | goto read_super_error; |
| @@ -1122,7 +1122,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) | |||
| 1122 | osb->osb_debug_root, | 1122 | osb->osb_debug_root, |
| 1123 | osb, | 1123 | osb, |
| 1124 | &ocfs2_osb_debug_fops); | 1124 | &ocfs2_osb_debug_fops); |
| 1125 | if (!osb->osb_ctxt) { | 1125 | if (IS_ERR_OR_NULL(osb->osb_ctxt)) { |
| 1126 | status = -EINVAL; | 1126 | status = -EINVAL; |
| 1127 | mlog_errno(status); | 1127 | mlog_errno(status); |
| 1128 | goto read_super_error; | 1128 | goto read_super_error; |
| @@ -1606,8 +1606,9 @@ static int __init ocfs2_init(void) | |||
| 1606 | } | 1606 | } |
| 1607 | 1607 | ||
| 1608 | ocfs2_debugfs_root = debugfs_create_dir("ocfs2", NULL); | 1608 | ocfs2_debugfs_root = debugfs_create_dir("ocfs2", NULL); |
| 1609 | if (!ocfs2_debugfs_root) { | 1609 | if (IS_ERR_OR_NULL(ocfs2_debugfs_root)) { |
| 1610 | status = -ENOMEM; | 1610 | status = ocfs2_debugfs_root ? |
| 1611 | PTR_ERR(ocfs2_debugfs_root) : -ENOMEM; | ||
| 1611 | mlog(ML_ERROR, "Unable to create ocfs2 debugfs root.\n"); | 1612 | mlog(ML_ERROR, "Unable to create ocfs2 debugfs root.\n"); |
| 1612 | goto out4; | 1613 | goto out4; |
| 1613 | } | 1614 | } |
| @@ -2069,6 +2070,8 @@ static int ocfs2_initialize_super(struct super_block *sb, | |||
| 2069 | cbits = le32_to_cpu(di->id2.i_super.s_clustersize_bits); | 2070 | cbits = le32_to_cpu(di->id2.i_super.s_clustersize_bits); |
| 2070 | bbits = le32_to_cpu(di->id2.i_super.s_blocksize_bits); | 2071 | bbits = le32_to_cpu(di->id2.i_super.s_blocksize_bits); |
| 2071 | sb->s_maxbytes = ocfs2_max_file_offset(bbits, cbits); | 2072 | sb->s_maxbytes = ocfs2_max_file_offset(bbits, cbits); |
| 2073 | memcpy(sb->s_uuid, di->id2.i_super.s_uuid, | ||
| 2074 | sizeof(di->id2.i_super.s_uuid)); | ||
| 2072 | 2075 | ||
| 2073 | osb->osb_dx_mask = (1 << (cbits - bbits)) - 1; | 2076 | osb->osb_dx_mask = (1 << (cbits - bbits)) - 1; |
| 2074 | 2077 | ||
| @@ -2333,7 +2336,7 @@ static int ocfs2_initialize_super(struct super_block *sb, | |||
| 2333 | mlog_errno(status); | 2336 | mlog_errno(status); |
| 2334 | goto bail; | 2337 | goto bail; |
| 2335 | } | 2338 | } |
| 2336 | cleancache_init_shared_fs((char *)&di->id2.i_super.s_uuid, sb); | 2339 | cleancache_init_shared_fs(sb); |
| 2337 | 2340 | ||
| 2338 | bail: | 2341 | bail: |
| 2339 | return status; | 2342 | return status; |
| @@ -2563,22 +2566,22 @@ static void ocfs2_handle_error(struct super_block *sb) | |||
| 2563 | ocfs2_set_ro_flag(osb, 0); | 2566 | ocfs2_set_ro_flag(osb, 0); |
| 2564 | } | 2567 | } |
| 2565 | 2568 | ||
| 2566 | static char error_buf[1024]; | 2569 | void __ocfs2_error(struct super_block *sb, const char *function, |
| 2567 | 2570 | const char *fmt, ...) | |
| 2568 | void __ocfs2_error(struct super_block *sb, | ||
| 2569 | const char *function, | ||
| 2570 | const char *fmt, ...) | ||
| 2571 | { | 2571 | { |
| 2572 | struct va_format vaf; | ||
| 2572 | va_list args; | 2573 | va_list args; |
| 2573 | 2574 | ||
| 2574 | va_start(args, fmt); | 2575 | va_start(args, fmt); |
| 2575 | vsnprintf(error_buf, sizeof(error_buf), fmt, args); | 2576 | vaf.fmt = fmt; |
| 2576 | va_end(args); | 2577 | vaf.va = &args; |
| 2577 | 2578 | ||
| 2578 | /* Not using mlog here because we want to show the actual | 2579 | /* Not using mlog here because we want to show the actual |
| 2579 | * function the error came from. */ | 2580 | * function the error came from. */ |
| 2580 | printk(KERN_CRIT "OCFS2: ERROR (device %s): %s: %s\n", | 2581 | printk(KERN_CRIT "OCFS2: ERROR (device %s): %s: %pV\n", |
| 2581 | sb->s_id, function, error_buf); | 2582 | sb->s_id, function, &vaf); |
| 2583 | |||
| 2584 | va_end(args); | ||
| 2582 | 2585 | ||
| 2583 | ocfs2_handle_error(sb); | 2586 | ocfs2_handle_error(sb); |
| 2584 | } | 2587 | } |
| @@ -2586,18 +2589,21 @@ void __ocfs2_error(struct super_block *sb, | |||
| 2586 | /* Handle critical errors. This is intentionally more drastic than | 2589 | /* Handle critical errors. This is intentionally more drastic than |
| 2587 | * ocfs2_handle_error, so we only use for things like journal errors, | 2590 | * ocfs2_handle_error, so we only use for things like journal errors, |
| 2588 | * etc. */ | 2591 | * etc. */ |
| 2589 | void __ocfs2_abort(struct super_block* sb, | 2592 | void __ocfs2_abort(struct super_block *sb, const char *function, |
| 2590 | const char *function, | ||
| 2591 | const char *fmt, ...) | 2593 | const char *fmt, ...) |
| 2592 | { | 2594 | { |
| 2595 | struct va_format vaf; | ||
| 2593 | va_list args; | 2596 | va_list args; |
| 2594 | 2597 | ||
| 2595 | va_start(args, fmt); | 2598 | va_start(args, fmt); |
| 2596 | vsnprintf(error_buf, sizeof(error_buf), fmt, args); | ||
| 2597 | va_end(args); | ||
| 2598 | 2599 | ||
| 2599 | printk(KERN_CRIT "OCFS2: abort (device %s): %s: %s\n", | 2600 | vaf.fmt = fmt; |
| 2600 | sb->s_id, function, error_buf); | 2601 | vaf.va = &args; |
| 2602 | |||
| 2603 | printk(KERN_CRIT "OCFS2: abort (device %s): %s: %pV\n", | ||
| 2604 | sb->s_id, function, &vaf); | ||
| 2605 | |||
| 2606 | va_end(args); | ||
| 2601 | 2607 | ||
| 2602 | /* We don't have the cluster support yet to go straight to | 2608 | /* We don't have the cluster support yet to go straight to |
| 2603 | * hard readonly in here. Until then, we want to keep | 2609 | * hard readonly in here. Until then, we want to keep |
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 85b190dc132f..4ca7533be479 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c | |||
| @@ -1238,6 +1238,10 @@ static int ocfs2_xattr_block_get(struct inode *inode, | |||
| 1238 | i, | 1238 | i, |
| 1239 | &block_off, | 1239 | &block_off, |
| 1240 | &name_offset); | 1240 | &name_offset); |
| 1241 | if (ret) { | ||
| 1242 | mlog_errno(ret); | ||
| 1243 | goto cleanup; | ||
| 1244 | } | ||
| 1241 | xs->base = bucket_block(xs->bucket, block_off); | 1245 | xs->base = bucket_block(xs->bucket, block_off); |
| 1242 | } | 1246 | } |
| 1243 | if (ocfs2_xattr_is_local(xs->here)) { | 1247 | if (ocfs2_xattr_is_local(xs->here)) { |
| @@ -5665,6 +5669,10 @@ static int ocfs2_delete_xattr_in_bucket(struct inode *inode, | |||
| 5665 | 5669 | ||
| 5666 | ret = ocfs2_get_xattr_tree_value_root(inode->i_sb, bucket, | 5670 | ret = ocfs2_get_xattr_tree_value_root(inode->i_sb, bucket, |
| 5667 | i, &xv, NULL); | 5671 | i, &xv, NULL); |
| 5672 | if (ret) { | ||
| 5673 | mlog_errno(ret); | ||
| 5674 | break; | ||
| 5675 | } | ||
| 5668 | 5676 | ||
| 5669 | ret = ocfs2_lock_xattr_remove_allocators(inode, xv, | 5677 | ret = ocfs2_lock_xattr_remove_allocators(inode, xv, |
| 5670 | args->ref_ci, | 5678 | args->ref_ci, |
| @@ -570,6 +570,7 @@ static int chown_common(struct path *path, uid_t user, gid_t group) | |||
| 570 | uid = make_kuid(current_user_ns(), user); | 570 | uid = make_kuid(current_user_ns(), user); |
| 571 | gid = make_kgid(current_user_ns(), group); | 571 | gid = make_kgid(current_user_ns(), group); |
| 572 | 572 | ||
| 573 | retry_deleg: | ||
| 573 | newattrs.ia_valid = ATTR_CTIME; | 574 | newattrs.ia_valid = ATTR_CTIME; |
| 574 | if (user != (uid_t) -1) { | 575 | if (user != (uid_t) -1) { |
| 575 | if (!uid_valid(uid)) | 576 | if (!uid_valid(uid)) |
| @@ -586,7 +587,6 @@ static int chown_common(struct path *path, uid_t user, gid_t group) | |||
| 586 | if (!S_ISDIR(inode->i_mode)) | 587 | if (!S_ISDIR(inode->i_mode)) |
| 587 | newattrs.ia_valid |= | 588 | newattrs.ia_valid |= |
| 588 | ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV; | 589 | ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV; |
| 589 | retry_deleg: | ||
| 590 | mutex_lock(&inode->i_mutex); | 590 | mutex_lock(&inode->i_mutex); |
| 591 | error = security_path_chown(path, uid, gid); | 591 | error = security_path_chown(path, uid, gid); |
| 592 | if (!error) | 592 | if (!error) |
| @@ -988,9 +988,6 @@ struct file *file_open_root(struct dentry *dentry, struct vfsmount *mnt, | |||
| 988 | return ERR_PTR(err); | 988 | return ERR_PTR(err); |
| 989 | if (flags & O_CREAT) | 989 | if (flags & O_CREAT) |
| 990 | return ERR_PTR(-EINVAL); | 990 | return ERR_PTR(-EINVAL); |
| 991 | if (!filename && (flags & O_DIRECTORY)) | ||
| 992 | if (!dentry->d_inode->i_op->lookup) | ||
| 993 | return ERR_PTR(-ENOTDIR); | ||
| 994 | return do_file_open_root(dentry, mnt, filename, &op); | 991 | return do_file_open_root(dentry, mnt, filename, &op); |
| 995 | } | 992 | } |
| 996 | EXPORT_SYMBOL(file_open_root); | 993 | EXPORT_SYMBOL(file_open_root); |
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index b90952f528b1..5f0d1993e6e3 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c | |||
| @@ -529,8 +529,7 @@ static int ovl_remount(struct super_block *sb, int *flags, char *data) | |||
| 529 | { | 529 | { |
| 530 | struct ovl_fs *ufs = sb->s_fs_info; | 530 | struct ovl_fs *ufs = sb->s_fs_info; |
| 531 | 531 | ||
| 532 | if (!(*flags & MS_RDONLY) && | 532 | if (!(*flags & MS_RDONLY) && !ufs->upper_mnt) |
| 533 | (!ufs->upper_mnt || (ufs->upper_mnt->mnt_sb->s_flags & MS_RDONLY))) | ||
| 534 | return -EROFS; | 533 | return -EROFS; |
| 535 | 534 | ||
| 536 | return 0; | 535 | return 0; |
| @@ -615,9 +614,19 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config) | |||
| 615 | break; | 614 | break; |
| 616 | 615 | ||
| 617 | default: | 616 | default: |
| 617 | pr_err("overlayfs: unrecognized mount option \"%s\" or missing value\n", p); | ||
| 618 | return -EINVAL; | 618 | return -EINVAL; |
| 619 | } | 619 | } |
| 620 | } | 620 | } |
| 621 | |||
| 622 | /* Workdir is useless in non-upper mount */ | ||
| 623 | if (!config->upperdir && config->workdir) { | ||
| 624 | pr_info("overlayfs: option \"workdir=%s\" is useless in a non-upper mount, ignore\n", | ||
| 625 | config->workdir); | ||
| 626 | kfree(config->workdir); | ||
| 627 | config->workdir = NULL; | ||
| 628 | } | ||
| 629 | |||
| 621 | return 0; | 630 | return 0; |
| 622 | } | 631 | } |
| 623 | 632 | ||
| @@ -837,7 +846,6 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) | |||
| 837 | 846 | ||
| 838 | sb->s_stack_depth = 0; | 847 | sb->s_stack_depth = 0; |
| 839 | if (ufs->config.upperdir) { | 848 | if (ufs->config.upperdir) { |
| 840 | /* FIXME: workdir is not needed for a R/O mount */ | ||
| 841 | if (!ufs->config.workdir) { | 849 | if (!ufs->config.workdir) { |
| 842 | pr_err("overlayfs: missing 'workdir'\n"); | 850 | pr_err("overlayfs: missing 'workdir'\n"); |
| 843 | goto out_free_config; | 851 | goto out_free_config; |
| @@ -847,6 +855,13 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) | |||
| 847 | if (err) | 855 | if (err) |
| 848 | goto out_free_config; | 856 | goto out_free_config; |
| 849 | 857 | ||
| 858 | /* Upper fs should not be r/o */ | ||
| 859 | if (upperpath.mnt->mnt_sb->s_flags & MS_RDONLY) { | ||
| 860 | pr_err("overlayfs: upper fs is r/o, try multi-lower layers mount\n"); | ||
| 861 | err = -EINVAL; | ||
| 862 | goto out_put_upperpath; | ||
| 863 | } | ||
| 864 | |||
| 850 | err = ovl_mount_dir(ufs->config.workdir, &workpath); | 865 | err = ovl_mount_dir(ufs->config.workdir, &workpath); |
| 851 | if (err) | 866 | if (err) |
| 852 | goto out_put_upperpath; | 867 | goto out_put_upperpath; |
| @@ -869,8 +884,14 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) | |||
| 869 | 884 | ||
| 870 | err = -EINVAL; | 885 | err = -EINVAL; |
| 871 | stacklen = ovl_split_lowerdirs(lowertmp); | 886 | stacklen = ovl_split_lowerdirs(lowertmp); |
| 872 | if (stacklen > OVL_MAX_STACK) | 887 | if (stacklen > OVL_MAX_STACK) { |
| 888 | pr_err("overlayfs: too many lower directries, limit is %d\n", | ||
| 889 | OVL_MAX_STACK); | ||
| 873 | goto out_free_lowertmp; | 890 | goto out_free_lowertmp; |
| 891 | } else if (!ufs->config.upperdir && stacklen == 1) { | ||
| 892 | pr_err("overlayfs: at least 2 lowerdir are needed while upperdir nonexistent\n"); | ||
| 893 | goto out_free_lowertmp; | ||
| 894 | } | ||
| 874 | 895 | ||
| 875 | stack = kcalloc(stacklen, sizeof(struct path), GFP_KERNEL); | 896 | stack = kcalloc(stacklen, sizeof(struct path), GFP_KERNEL); |
| 876 | if (!stack) | 897 | if (!stack) |
| @@ -932,8 +953,8 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) | |||
| 932 | ufs->numlower++; | 953 | ufs->numlower++; |
| 933 | } | 954 | } |
| 934 | 955 | ||
| 935 | /* If the upper fs is r/o or nonexistent, we mark overlayfs r/o too */ | 956 | /* If the upper fs is nonexistent, we mark overlayfs r/o too */ |
| 936 | if (!ufs->upper_mnt || (ufs->upper_mnt->mnt_sb->s_flags & MS_RDONLY)) | 957 | if (!ufs->upper_mnt) |
| 937 | sb->s_flags |= MS_RDONLY; | 958 | sb->s_flags |= MS_RDONLY; |
| 938 | 959 | ||
| 939 | sb->s_d_op = &ovl_dentry_operations; | 960 | sb->s_d_op = &ovl_dentry_operations; |
| @@ -21,7 +21,6 @@ | |||
| 21 | #include <linux/audit.h> | 21 | #include <linux/audit.h> |
| 22 | #include <linux/syscalls.h> | 22 | #include <linux/syscalls.h> |
| 23 | #include <linux/fcntl.h> | 23 | #include <linux/fcntl.h> |
| 24 | #include <linux/aio.h> | ||
| 25 | 24 | ||
| 26 | #include <asm/uaccess.h> | 25 | #include <asm/uaccess.h> |
| 27 | #include <asm/ioctls.h> | 26 | #include <asm/ioctls.h> |
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 956b75d61809..6dee68d013ff 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c | |||
| @@ -1325,6 +1325,9 @@ out: | |||
| 1325 | 1325 | ||
| 1326 | static int pagemap_open(struct inode *inode, struct file *file) | 1326 | static int pagemap_open(struct inode *inode, struct file *file) |
| 1327 | { | 1327 | { |
| 1328 | /* do not disclose physical addresses: attack vector */ | ||
| 1329 | if (!capable(CAP_SYS_ADMIN)) | ||
| 1330 | return -EPERM; | ||
| 1328 | pr_warn_once("Bits 55-60 of /proc/PID/pagemap entries are about " | 1331 | pr_warn_once("Bits 55-60 of /proc/PID/pagemap entries are about " |
| 1329 | "to stop being page-shift some time soon. See the " | 1332 | "to stop being page-shift some time soon. See the " |
| 1330 | "linux/Documentation/vm/pagemap.txt for details.\n"); | 1333 | "linux/Documentation/vm/pagemap.txt for details.\n"); |
diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c index 39d1373128e9..44a549beeafa 100644 --- a/fs/pstore/ram.c +++ b/fs/pstore/ram.c | |||
| @@ -539,6 +539,9 @@ static int ramoops_probe(struct platform_device *pdev) | |||
| 539 | mem_address = pdata->mem_address; | 539 | mem_address = pdata->mem_address; |
| 540 | record_size = pdata->record_size; | 540 | record_size = pdata->record_size; |
| 541 | dump_oops = pdata->dump_oops; | 541 | dump_oops = pdata->dump_oops; |
| 542 | ramoops_console_size = pdata->console_size; | ||
| 543 | ramoops_pmsg_size = pdata->pmsg_size; | ||
| 544 | ramoops_ftrace_size = pdata->ftrace_size; | ||
| 542 | 545 | ||
| 543 | pr_info("attached 0x%lx@0x%llx, ecc: %d/%d\n", | 546 | pr_info("attached 0x%lx@0x%llx, ecc: %d/%d\n", |
| 544 | cxt->size, (unsigned long long)cxt->phys_addr, | 547 | cxt->size, (unsigned long long)cxt->phys_addr, |
diff --git a/fs/read_write.c b/fs/read_write.c index 8e1b68786d66..69128b378646 100644 --- a/fs/read_write.c +++ b/fs/read_write.c | |||
| @@ -9,7 +9,6 @@ | |||
| 9 | #include <linux/fcntl.h> | 9 | #include <linux/fcntl.h> |
| 10 | #include <linux/file.h> | 10 | #include <linux/file.h> |
| 11 | #include <linux/uio.h> | 11 | #include <linux/uio.h> |
| 12 | #include <linux/aio.h> | ||
| 13 | #include <linux/fsnotify.h> | 12 | #include <linux/fsnotify.h> |
| 14 | #include <linux/security.h> | 13 | #include <linux/security.h> |
| 15 | #include <linux/export.h> | 14 | #include <linux/export.h> |
| @@ -343,13 +342,10 @@ ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos) | |||
| 343 | 342 | ||
| 344 | init_sync_kiocb(&kiocb, file); | 343 | init_sync_kiocb(&kiocb, file); |
| 345 | kiocb.ki_pos = *ppos; | 344 | kiocb.ki_pos = *ppos; |
| 346 | kiocb.ki_nbytes = iov_iter_count(iter); | ||
| 347 | 345 | ||
| 348 | iter->type |= READ; | 346 | iter->type |= READ; |
| 349 | ret = file->f_op->read_iter(&kiocb, iter); | 347 | ret = file->f_op->read_iter(&kiocb, iter); |
| 350 | if (ret == -EIOCBQUEUED) | 348 | BUG_ON(ret == -EIOCBQUEUED); |
| 351 | ret = wait_on_sync_kiocb(&kiocb); | ||
| 352 | |||
| 353 | if (ret > 0) | 349 | if (ret > 0) |
| 354 | *ppos = kiocb.ki_pos; | 350 | *ppos = kiocb.ki_pos; |
| 355 | return ret; | 351 | return ret; |
| @@ -366,13 +362,10 @@ ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos) | |||
| 366 | 362 | ||
| 367 | init_sync_kiocb(&kiocb, file); | 363 | init_sync_kiocb(&kiocb, file); |
| 368 | kiocb.ki_pos = *ppos; | 364 | kiocb.ki_pos = *ppos; |
| 369 | kiocb.ki_nbytes = iov_iter_count(iter); | ||
| 370 | 365 | ||
| 371 | iter->type |= WRITE; | 366 | iter->type |= WRITE; |
| 372 | ret = file->f_op->write_iter(&kiocb, iter); | 367 | ret = file->f_op->write_iter(&kiocb, iter); |
| 373 | if (ret == -EIOCBQUEUED) | 368 | BUG_ON(ret == -EIOCBQUEUED); |
| 374 | ret = wait_on_sync_kiocb(&kiocb); | ||
| 375 | |||
| 376 | if (ret > 0) | 369 | if (ret > 0) |
| 377 | *ppos = kiocb.ki_pos; | 370 | *ppos = kiocb.ki_pos; |
| 378 | return ret; | 371 | return ret; |
| @@ -426,11 +419,9 @@ ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *pp | |||
| 426 | 419 | ||
| 427 | init_sync_kiocb(&kiocb, filp); | 420 | init_sync_kiocb(&kiocb, filp); |
| 428 | kiocb.ki_pos = *ppos; | 421 | kiocb.ki_pos = *ppos; |
| 429 | kiocb.ki_nbytes = len; | ||
| 430 | 422 | ||
| 431 | ret = filp->f_op->aio_read(&kiocb, &iov, 1, kiocb.ki_pos); | 423 | ret = filp->f_op->aio_read(&kiocb, &iov, 1, kiocb.ki_pos); |
| 432 | if (-EIOCBQUEUED == ret) | 424 | BUG_ON(ret == -EIOCBQUEUED); |
| 433 | ret = wait_on_sync_kiocb(&kiocb); | ||
| 434 | *ppos = kiocb.ki_pos; | 425 | *ppos = kiocb.ki_pos; |
| 435 | return ret; | 426 | return ret; |
| 436 | } | 427 | } |
| @@ -446,12 +437,10 @@ ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *p | |||
| 446 | 437 | ||
| 447 | init_sync_kiocb(&kiocb, filp); | 438 | init_sync_kiocb(&kiocb, filp); |
| 448 | kiocb.ki_pos = *ppos; | 439 | kiocb.ki_pos = *ppos; |
| 449 | kiocb.ki_nbytes = len; | ||
| 450 | iov_iter_init(&iter, READ, &iov, 1, len); | 440 | iov_iter_init(&iter, READ, &iov, 1, len); |
| 451 | 441 | ||
| 452 | ret = filp->f_op->read_iter(&kiocb, &iter); | 442 | ret = filp->f_op->read_iter(&kiocb, &iter); |
| 453 | if (-EIOCBQUEUED == ret) | 443 | BUG_ON(ret == -EIOCBQUEUED); |
| 454 | ret = wait_on_sync_kiocb(&kiocb); | ||
| 455 | *ppos = kiocb.ki_pos; | 444 | *ppos = kiocb.ki_pos; |
| 456 | return ret; | 445 | return ret; |
| 457 | } | 446 | } |
| @@ -510,11 +499,9 @@ ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, lof | |||
| 510 | 499 | ||
| 511 | init_sync_kiocb(&kiocb, filp); | 500 | init_sync_kiocb(&kiocb, filp); |
| 512 | kiocb.ki_pos = *ppos; | 501 | kiocb.ki_pos = *ppos; |
| 513 | kiocb.ki_nbytes = len; | ||
| 514 | 502 | ||
| 515 | ret = filp->f_op->aio_write(&kiocb, &iov, 1, kiocb.ki_pos); | 503 | ret = filp->f_op->aio_write(&kiocb, &iov, 1, kiocb.ki_pos); |
| 516 | if (-EIOCBQUEUED == ret) | 504 | BUG_ON(ret == -EIOCBQUEUED); |
| 517 | ret = wait_on_sync_kiocb(&kiocb); | ||
| 518 | *ppos = kiocb.ki_pos; | 505 | *ppos = kiocb.ki_pos; |
| 519 | return ret; | 506 | return ret; |
| 520 | } | 507 | } |
| @@ -530,12 +517,10 @@ ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t len, lo | |||
| 530 | 517 | ||
| 531 | init_sync_kiocb(&kiocb, filp); | 518 | init_sync_kiocb(&kiocb, filp); |
| 532 | kiocb.ki_pos = *ppos; | 519 | kiocb.ki_pos = *ppos; |
| 533 | kiocb.ki_nbytes = len; | ||
| 534 | iov_iter_init(&iter, WRITE, &iov, 1, len); | 520 | iov_iter_init(&iter, WRITE, &iov, 1, len); |
| 535 | 521 | ||
| 536 | ret = filp->f_op->write_iter(&kiocb, &iter); | 522 | ret = filp->f_op->write_iter(&kiocb, &iter); |
| 537 | if (-EIOCBQUEUED == ret) | 523 | BUG_ON(ret == -EIOCBQUEUED); |
| 538 | ret = wait_on_sync_kiocb(&kiocb); | ||
| 539 | *ppos = kiocb.ki_pos; | 524 | *ppos = kiocb.ki_pos; |
| 540 | return ret; | 525 | return ret; |
| 541 | } | 526 | } |
| @@ -710,60 +695,47 @@ unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to) | |||
| 710 | } | 695 | } |
| 711 | EXPORT_SYMBOL(iov_shorten); | 696 | EXPORT_SYMBOL(iov_shorten); |
| 712 | 697 | ||
| 713 | static ssize_t do_iter_readv_writev(struct file *filp, int rw, const struct iovec *iov, | 698 | static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter, |
| 714 | unsigned long nr_segs, size_t len, loff_t *ppos, iter_fn_t fn) | 699 | loff_t *ppos, iter_fn_t fn) |
| 715 | { | 700 | { |
| 716 | struct kiocb kiocb; | 701 | struct kiocb kiocb; |
| 717 | struct iov_iter iter; | ||
| 718 | ssize_t ret; | 702 | ssize_t ret; |
| 719 | 703 | ||
| 720 | init_sync_kiocb(&kiocb, filp); | 704 | init_sync_kiocb(&kiocb, filp); |
| 721 | kiocb.ki_pos = *ppos; | 705 | kiocb.ki_pos = *ppos; |
| 722 | kiocb.ki_nbytes = len; | ||
| 723 | 706 | ||
| 724 | iov_iter_init(&iter, rw, iov, nr_segs, len); | 707 | ret = fn(&kiocb, iter); |
| 725 | ret = fn(&kiocb, &iter); | 708 | BUG_ON(ret == -EIOCBQUEUED); |
| 726 | if (ret == -EIOCBQUEUED) | ||
| 727 | ret = wait_on_sync_kiocb(&kiocb); | ||
| 728 | *ppos = kiocb.ki_pos; | 709 | *ppos = kiocb.ki_pos; |
| 729 | return ret; | 710 | return ret; |
| 730 | } | 711 | } |
| 731 | 712 | ||
| 732 | static ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov, | 713 | static ssize_t do_sync_readv_writev(struct file *filp, struct iov_iter *iter, |
| 733 | unsigned long nr_segs, size_t len, loff_t *ppos, iov_fn_t fn) | 714 | loff_t *ppos, iov_fn_t fn) |
| 734 | { | 715 | { |
| 735 | struct kiocb kiocb; | 716 | struct kiocb kiocb; |
| 736 | ssize_t ret; | 717 | ssize_t ret; |
| 737 | 718 | ||
| 738 | init_sync_kiocb(&kiocb, filp); | 719 | init_sync_kiocb(&kiocb, filp); |
| 739 | kiocb.ki_pos = *ppos; | 720 | kiocb.ki_pos = *ppos; |
| 740 | kiocb.ki_nbytes = len; | ||
| 741 | 721 | ||
| 742 | ret = fn(&kiocb, iov, nr_segs, kiocb.ki_pos); | 722 | ret = fn(&kiocb, iter->iov, iter->nr_segs, kiocb.ki_pos); |
| 743 | if (ret == -EIOCBQUEUED) | 723 | BUG_ON(ret == -EIOCBQUEUED); |
| 744 | ret = wait_on_sync_kiocb(&kiocb); | ||
| 745 | *ppos = kiocb.ki_pos; | 724 | *ppos = kiocb.ki_pos; |
| 746 | return ret; | 725 | return ret; |
| 747 | } | 726 | } |
| 748 | 727 | ||
| 749 | /* Do it by hand, with file-ops */ | 728 | /* Do it by hand, with file-ops */ |
| 750 | static ssize_t do_loop_readv_writev(struct file *filp, struct iovec *iov, | 729 | static ssize_t do_loop_readv_writev(struct file *filp, struct iov_iter *iter, |
| 751 | unsigned long nr_segs, loff_t *ppos, io_fn_t fn) | 730 | loff_t *ppos, io_fn_t fn) |
| 752 | { | 731 | { |
| 753 | struct iovec *vector = iov; | ||
| 754 | ssize_t ret = 0; | 732 | ssize_t ret = 0; |
| 755 | 733 | ||
| 756 | while (nr_segs > 0) { | 734 | while (iov_iter_count(iter)) { |
| 757 | void __user *base; | 735 | struct iovec iovec = iov_iter_iovec(iter); |
| 758 | size_t len; | ||
| 759 | ssize_t nr; | 736 | ssize_t nr; |
| 760 | 737 | ||
| 761 | base = vector->iov_base; | 738 | nr = fn(filp, iovec.iov_base, iovec.iov_len, ppos); |
| 762 | len = vector->iov_len; | ||
| 763 | vector++; | ||
| 764 | nr_segs--; | ||
| 765 | |||
| 766 | nr = fn(filp, base, len, ppos); | ||
| 767 | 739 | ||
| 768 | if (nr < 0) { | 740 | if (nr < 0) { |
| 769 | if (!ret) | 741 | if (!ret) |
| @@ -771,8 +743,9 @@ static ssize_t do_loop_readv_writev(struct file *filp, struct iovec *iov, | |||
| 771 | break; | 743 | break; |
| 772 | } | 744 | } |
| 773 | ret += nr; | 745 | ret += nr; |
| 774 | if (nr != len) | 746 | if (nr != iovec.iov_len) |
| 775 | break; | 747 | break; |
| 748 | iov_iter_advance(iter, nr); | ||
| 776 | } | 749 | } |
| 777 | 750 | ||
| 778 | return ret; | 751 | return ret; |
| @@ -863,17 +836,20 @@ static ssize_t do_readv_writev(int type, struct file *file, | |||
| 863 | size_t tot_len; | 836 | size_t tot_len; |
| 864 | struct iovec iovstack[UIO_FASTIOV]; | 837 | struct iovec iovstack[UIO_FASTIOV]; |
| 865 | struct iovec *iov = iovstack; | 838 | struct iovec *iov = iovstack; |
| 839 | struct iov_iter iter; | ||
| 866 | ssize_t ret; | 840 | ssize_t ret; |
| 867 | io_fn_t fn; | 841 | io_fn_t fn; |
| 868 | iov_fn_t fnv; | 842 | iov_fn_t fnv; |
| 869 | iter_fn_t iter_fn; | 843 | iter_fn_t iter_fn; |
| 870 | 844 | ||
| 871 | ret = rw_copy_check_uvector(type, uvector, nr_segs, | 845 | ret = import_iovec(type, uvector, nr_segs, |
| 872 | ARRAY_SIZE(iovstack), iovstack, &iov); | 846 | ARRAY_SIZE(iovstack), &iov, &iter); |
| 873 | if (ret <= 0) | 847 | if (ret < 0) |
| 874 | goto out; | 848 | return ret; |
| 875 | 849 | ||
| 876 | tot_len = ret; | 850 | tot_len = iov_iter_count(&iter); |
| 851 | if (!tot_len) | ||
| 852 | goto out; | ||
| 877 | ret = rw_verify_area(type, file, pos, tot_len); | 853 | ret = rw_verify_area(type, file, pos, tot_len); |
| 878 | if (ret < 0) | 854 | if (ret < 0) |
| 879 | goto out; | 855 | goto out; |
| @@ -891,20 +867,17 @@ static ssize_t do_readv_writev(int type, struct file *file, | |||
| 891 | } | 867 | } |
| 892 | 868 | ||
| 893 | if (iter_fn) | 869 | if (iter_fn) |
| 894 | ret = do_iter_readv_writev(file, type, iov, nr_segs, tot_len, | 870 | ret = do_iter_readv_writev(file, &iter, pos, iter_fn); |
| 895 | pos, iter_fn); | ||
| 896 | else if (fnv) | 871 | else if (fnv) |
| 897 | ret = do_sync_readv_writev(file, iov, nr_segs, tot_len, | 872 | ret = do_sync_readv_writev(file, &iter, pos, fnv); |
| 898 | pos, fnv); | ||
| 899 | else | 873 | else |
| 900 | ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn); | 874 | ret = do_loop_readv_writev(file, &iter, pos, fn); |
| 901 | 875 | ||
| 902 | if (type != READ) | 876 | if (type != READ) |
| 903 | file_end_write(file); | 877 | file_end_write(file); |
| 904 | 878 | ||
| 905 | out: | 879 | out: |
| 906 | if (iov != iovstack) | 880 | kfree(iov); |
| 907 | kfree(iov); | ||
| 908 | if ((ret + (type == READ)) > 0) { | 881 | if ((ret + (type == READ)) > 0) { |
| 909 | if (type == READ) | 882 | if (type == READ) |
| 910 | fsnotify_access(file); | 883 | fsnotify_access(file); |
| @@ -1043,17 +1016,20 @@ static ssize_t compat_do_readv_writev(int type, struct file *file, | |||
| 1043 | compat_ssize_t tot_len; | 1016 | compat_ssize_t tot_len; |
| 1044 | struct iovec iovstack[UIO_FASTIOV]; | 1017 | struct iovec iovstack[UIO_FASTIOV]; |
| 1045 | struct iovec *iov = iovstack; | 1018 | struct iovec *iov = iovstack; |
| 1019 | struct iov_iter iter; | ||
| 1046 | ssize_t ret; | 1020 | ssize_t ret; |
| 1047 | io_fn_t fn; | 1021 | io_fn_t fn; |
| 1048 | iov_fn_t fnv; | 1022 | iov_fn_t fnv; |
| 1049 | iter_fn_t iter_fn; | 1023 | iter_fn_t iter_fn; |
| 1050 | 1024 | ||
| 1051 | ret = compat_rw_copy_check_uvector(type, uvector, nr_segs, | 1025 | ret = compat_import_iovec(type, uvector, nr_segs, |
| 1052 | UIO_FASTIOV, iovstack, &iov); | 1026 | UIO_FASTIOV, &iov, &iter); |
| 1053 | if (ret <= 0) | 1027 | if (ret < 0) |
| 1054 | goto out; | 1028 | return ret; |
| 1055 | 1029 | ||
| 1056 | tot_len = ret; | 1030 | tot_len = iov_iter_count(&iter); |
| 1031 | if (!tot_len) | ||
| 1032 | goto out; | ||
| 1057 | ret = rw_verify_area(type, file, pos, tot_len); | 1033 | ret = rw_verify_area(type, file, pos, tot_len); |
| 1058 | if (ret < 0) | 1034 | if (ret < 0) |
| 1059 | goto out; | 1035 | goto out; |
| @@ -1071,20 +1047,17 @@ static ssize_t compat_do_readv_writev(int type, struct file *file, | |||
| 1071 | } | 1047 | } |
| 1072 | 1048 | ||
| 1073 | if (iter_fn) | 1049 | if (iter_fn) |
| 1074 | ret = do_iter_readv_writev(file, type, iov, nr_segs, tot_len, | 1050 | ret = do_iter_readv_writev(file, &iter, pos, iter_fn); |
| 1075 | pos, iter_fn); | ||
| 1076 | else if (fnv) | 1051 | else if (fnv) |
| 1077 | ret = do_sync_readv_writev(file, iov, nr_segs, tot_len, | 1052 | ret = do_sync_readv_writev(file, &iter, pos, fnv); |
| 1078 | pos, fnv); | ||
| 1079 | else | 1053 | else |
| 1080 | ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn); | 1054 | ret = do_loop_readv_writev(file, &iter, pos, fn); |
| 1081 | 1055 | ||
| 1082 | if (type != READ) | 1056 | if (type != READ) |
| 1083 | file_end_write(file); | 1057 | file_end_write(file); |
| 1084 | 1058 | ||
| 1085 | out: | 1059 | out: |
| 1086 | if (iov != iovstack) | 1060 | kfree(iov); |
| 1087 | kfree(iov); | ||
| 1088 | if ((ret + (type == READ)) > 0) { | 1061 | if ((ret + (type == READ)) > 0) { |
| 1089 | if (type == READ) | 1062 | if (type == READ) |
| 1090 | fsnotify_access(file); | 1063 | fsnotify_access(file); |
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index e72401e1f995..9312b7842e03 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c | |||
| @@ -18,7 +18,7 @@ | |||
| 18 | #include <linux/writeback.h> | 18 | #include <linux/writeback.h> |
| 19 | #include <linux/quotaops.h> | 19 | #include <linux/quotaops.h> |
| 20 | #include <linux/swap.h> | 20 | #include <linux/swap.h> |
| 21 | #include <linux/aio.h> | 21 | #include <linux/uio.h> |
| 22 | 22 | ||
| 23 | int reiserfs_commit_write(struct file *f, struct page *page, | 23 | int reiserfs_commit_write(struct file *f, struct page *page, |
| 24 | unsigned from, unsigned to); | 24 | unsigned from, unsigned to); |
diff --git a/fs/splice.c b/fs/splice.c index 7968da96bebb..41cbb16299e0 100644 --- a/fs/splice.c +++ b/fs/splice.c | |||
| @@ -32,7 +32,6 @@ | |||
| 32 | #include <linux/gfp.h> | 32 | #include <linux/gfp.h> |
| 33 | #include <linux/socket.h> | 33 | #include <linux/socket.h> |
| 34 | #include <linux/compat.h> | 34 | #include <linux/compat.h> |
| 35 | #include <linux/aio.h> | ||
| 36 | #include "internal.h" | 35 | #include "internal.h" |
| 37 | 36 | ||
| 38 | /* | 37 | /* |
| @@ -1534,34 +1533,29 @@ static long vmsplice_to_user(struct file *file, const struct iovec __user *uiov, | |||
| 1534 | struct iovec iovstack[UIO_FASTIOV]; | 1533 | struct iovec iovstack[UIO_FASTIOV]; |
| 1535 | struct iovec *iov = iovstack; | 1534 | struct iovec *iov = iovstack; |
| 1536 | struct iov_iter iter; | 1535 | struct iov_iter iter; |
| 1537 | ssize_t count; | ||
| 1538 | 1536 | ||
| 1539 | pipe = get_pipe_info(file); | 1537 | pipe = get_pipe_info(file); |
| 1540 | if (!pipe) | 1538 | if (!pipe) |
| 1541 | return -EBADF; | 1539 | return -EBADF; |
| 1542 | 1540 | ||
| 1543 | ret = rw_copy_check_uvector(READ, uiov, nr_segs, | 1541 | ret = import_iovec(READ, uiov, nr_segs, |
| 1544 | ARRAY_SIZE(iovstack), iovstack, &iov); | 1542 | ARRAY_SIZE(iovstack), &iov, &iter); |
| 1545 | if (ret <= 0) | 1543 | if (ret < 0) |
| 1546 | goto out; | 1544 | return ret; |
| 1547 | |||
| 1548 | count = ret; | ||
| 1549 | iov_iter_init(&iter, READ, iov, nr_segs, count); | ||
| 1550 | 1545 | ||
| 1546 | sd.total_len = iov_iter_count(&iter); | ||
| 1551 | sd.len = 0; | 1547 | sd.len = 0; |
| 1552 | sd.total_len = count; | ||
| 1553 | sd.flags = flags; | 1548 | sd.flags = flags; |
| 1554 | sd.u.data = &iter; | 1549 | sd.u.data = &iter; |
| 1555 | sd.pos = 0; | 1550 | sd.pos = 0; |
| 1556 | 1551 | ||
| 1557 | pipe_lock(pipe); | 1552 | if (sd.total_len) { |
| 1558 | ret = __splice_from_pipe(pipe, &sd, pipe_to_user); | 1553 | pipe_lock(pipe); |
| 1559 | pipe_unlock(pipe); | 1554 | ret = __splice_from_pipe(pipe, &sd, pipe_to_user); |
| 1560 | 1555 | pipe_unlock(pipe); | |
| 1561 | out: | 1556 | } |
| 1562 | if (iov != iovstack) | ||
| 1563 | kfree(iov); | ||
| 1564 | 1557 | ||
| 1558 | kfree(iov); | ||
| 1565 | return ret; | 1559 | return ret; |
| 1566 | } | 1560 | } |
| 1567 | 1561 | ||
| @@ -66,7 +66,7 @@ int vfs_getattr(struct path *path, struct kstat *stat) | |||
| 66 | { | 66 | { |
| 67 | int retval; | 67 | int retval; |
| 68 | 68 | ||
| 69 | retval = security_inode_getattr(path->mnt, path->dentry); | 69 | retval = security_inode_getattr(path); |
| 70 | if (retval) | 70 | if (retval) |
| 71 | return retval; | 71 | return retval; |
| 72 | return vfs_getattr_nosec(path, stat); | 72 | return vfs_getattr_nosec(path, stat); |
diff --git a/fs/super.c b/fs/super.c index 2b7dc90ccdbb..928c20f47af9 100644 --- a/fs/super.c +++ b/fs/super.c | |||
| @@ -224,7 +224,7 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags) | |||
| 224 | s->s_maxbytes = MAX_NON_LFS; | 224 | s->s_maxbytes = MAX_NON_LFS; |
| 225 | s->s_op = &default_op; | 225 | s->s_op = &default_op; |
| 226 | s->s_time_gran = 1000000000; | 226 | s->s_time_gran = 1000000000; |
| 227 | s->cleancache_poolid = -1; | 227 | s->cleancache_poolid = CLEANCACHE_NO_POOL; |
| 228 | 228 | ||
| 229 | s->s_shrink.seeks = DEFAULT_SEEKS; | 229 | s->s_shrink.seeks = DEFAULT_SEEKS; |
| 230 | s->s_shrink.scan_objects = super_cache_scan; | 230 | s->s_shrink.scan_objects = super_cache_scan; |
diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c index 2554d8835b48..b400c04371f0 100644 --- a/fs/sysfs/group.c +++ b/fs/sysfs/group.c | |||
| @@ -41,7 +41,7 @@ static int create_files(struct kernfs_node *parent, struct kobject *kobj, | |||
| 41 | 41 | ||
| 42 | if (grp->attrs) { | 42 | if (grp->attrs) { |
| 43 | for (i = 0, attr = grp->attrs; *attr && !error; i++, attr++) { | 43 | for (i = 0, attr = grp->attrs; *attr && !error; i++, attr++) { |
| 44 | umode_t mode = 0; | 44 | umode_t mode = (*attr)->mode; |
| 45 | 45 | ||
| 46 | /* | 46 | /* |
| 47 | * In update mode, we're changing the permissions or | 47 | * In update mode, we're changing the permissions or |
| @@ -55,9 +55,14 @@ static int create_files(struct kernfs_node *parent, struct kobject *kobj, | |||
| 55 | if (!mode) | 55 | if (!mode) |
| 56 | continue; | 56 | continue; |
| 57 | } | 57 | } |
| 58 | |||
| 59 | WARN(mode & ~(SYSFS_PREALLOC | 0664), | ||
| 60 | "Attribute %s: Invalid permissions 0%o\n", | ||
| 61 | (*attr)->name, mode); | ||
| 62 | |||
| 63 | mode &= SYSFS_PREALLOC | 0664; | ||
| 58 | error = sysfs_add_file_mode_ns(parent, *attr, false, | 64 | error = sysfs_add_file_mode_ns(parent, *attr, false, |
| 59 | (*attr)->mode | mode, | 65 | mode, NULL); |
| 60 | NULL); | ||
| 61 | if (unlikely(error)) | 66 | if (unlikely(error)) |
| 62 | break; | 67 | break; |
| 63 | } | 68 | } |
diff --git a/fs/tracefs/Makefile b/fs/tracefs/Makefile new file mode 100644 index 000000000000..82fa35b656c4 --- /dev/null +++ b/fs/tracefs/Makefile | |||
| @@ -0,0 +1,4 @@ | |||
| 1 | tracefs-objs := inode.o | ||
| 2 | |||
| 3 | obj-$(CONFIG_TRACING) += tracefs.o | ||
| 4 | |||
diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c new file mode 100644 index 000000000000..d92bdf3b079a --- /dev/null +++ b/fs/tracefs/inode.c | |||
| @@ -0,0 +1,650 @@ | |||
| 1 | /* | ||
| 2 | * inode.c - part of tracefs, a pseudo file system for activating tracing | ||
| 3 | * | ||
| 4 | * Based on debugfs by: Greg Kroah-Hartman <greg@kroah.com> | ||
| 5 | * | ||
| 6 | * Copyright (C) 2014 Red Hat Inc, author: Steven Rostedt <srostedt@redhat.com> | ||
| 7 | * | ||
| 8 | * This program is free software; you can redistribute it and/or | ||
| 9 | * modify it under the terms of the GNU General Public License version | ||
| 10 | * 2 as published by the Free Software Foundation. | ||
| 11 | * | ||
| 12 | * tracefs is the file system that is used by the tracing infrastructure. | ||
| 13 | * | ||
| 14 | */ | ||
| 15 | |||
| 16 | #include <linux/module.h> | ||
| 17 | #include <linux/fs.h> | ||
| 18 | #include <linux/mount.h> | ||
| 19 | #include <linux/kobject.h> | ||
| 20 | #include <linux/namei.h> | ||
| 21 | #include <linux/tracefs.h> | ||
| 22 | #include <linux/fsnotify.h> | ||
| 23 | #include <linux/seq_file.h> | ||
| 24 | #include <linux/parser.h> | ||
| 25 | #include <linux/magic.h> | ||
| 26 | #include <linux/slab.h> | ||
| 27 | |||
| 28 | #define TRACEFS_DEFAULT_MODE 0700 | ||
| 29 | |||
| 30 | static struct vfsmount *tracefs_mount; | ||
| 31 | static int tracefs_mount_count; | ||
| 32 | static bool tracefs_registered; | ||
| 33 | |||
| 34 | static ssize_t default_read_file(struct file *file, char __user *buf, | ||
| 35 | size_t count, loff_t *ppos) | ||
| 36 | { | ||
| 37 | return 0; | ||
| 38 | } | ||
| 39 | |||
| 40 | static ssize_t default_write_file(struct file *file, const char __user *buf, | ||
| 41 | size_t count, loff_t *ppos) | ||
| 42 | { | ||
| 43 | return count; | ||
| 44 | } | ||
| 45 | |||
| 46 | static const struct file_operations tracefs_file_operations = { | ||
| 47 | .read = default_read_file, | ||
| 48 | .write = default_write_file, | ||
| 49 | .open = simple_open, | ||
| 50 | .llseek = noop_llseek, | ||
| 51 | }; | ||
| 52 | |||
| 53 | static struct tracefs_dir_ops { | ||
| 54 | int (*mkdir)(const char *name); | ||
| 55 | int (*rmdir)(const char *name); | ||
| 56 | } tracefs_ops; | ||
| 57 | |||
| 58 | static char *get_dname(struct dentry *dentry) | ||
| 59 | { | ||
| 60 | const char *dname; | ||
| 61 | char *name; | ||
| 62 | int len = dentry->d_name.len; | ||
| 63 | |||
| 64 | dname = dentry->d_name.name; | ||
| 65 | name = kmalloc(len + 1, GFP_KERNEL); | ||
| 66 | if (!name) | ||
| 67 | return NULL; | ||
| 68 | memcpy(name, dname, len); | ||
| 69 | name[len] = 0; | ||
| 70 | return name; | ||
| 71 | } | ||
| 72 | |||
| 73 | static int tracefs_syscall_mkdir(struct inode *inode, struct dentry *dentry, umode_t mode) | ||
| 74 | { | ||
| 75 | char *name; | ||
| 76 | int ret; | ||
| 77 | |||
| 78 | name = get_dname(dentry); | ||
| 79 | if (!name) | ||
| 80 | return -ENOMEM; | ||
| 81 | |||
| 82 | /* | ||
| 83 | * The mkdir call can call the generic functions that create | ||
| 84 | * the files within the tracefs system. It is up to the individual | ||
| 85 | * mkdir routine to handle races. | ||
| 86 | */ | ||
| 87 | mutex_unlock(&inode->i_mutex); | ||
| 88 | ret = tracefs_ops.mkdir(name); | ||
| 89 | mutex_lock(&inode->i_mutex); | ||
| 90 | |||
| 91 | kfree(name); | ||
| 92 | |||
| 93 | return ret; | ||
| 94 | } | ||
| 95 | |||
| 96 | static int tracefs_syscall_rmdir(struct inode *inode, struct dentry *dentry) | ||
| 97 | { | ||
| 98 | char *name; | ||
| 99 | int ret; | ||
| 100 | |||
| 101 | name = get_dname(dentry); | ||
| 102 | if (!name) | ||
| 103 | return -ENOMEM; | ||
| 104 | |||
| 105 | /* | ||
| 106 | * The rmdir call can call the generic functions that create | ||
| 107 | * the files within the tracefs system. It is up to the individual | ||
| 108 | * rmdir routine to handle races. | ||
| 109 | * This time we need to unlock not only the parent (inode) but | ||
| 110 | * also the directory that is being deleted. | ||
| 111 | */ | ||
| 112 | mutex_unlock(&inode->i_mutex); | ||
| 113 | mutex_unlock(&dentry->d_inode->i_mutex); | ||
| 114 | |||
| 115 | ret = tracefs_ops.rmdir(name); | ||
| 116 | |||
| 117 | mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT); | ||
| 118 | mutex_lock(&dentry->d_inode->i_mutex); | ||
| 119 | |||
| 120 | kfree(name); | ||
| 121 | |||
| 122 | return ret; | ||
| 123 | } | ||
| 124 | |||
| 125 | static const struct inode_operations tracefs_dir_inode_operations = { | ||
| 126 | .lookup = simple_lookup, | ||
| 127 | .mkdir = tracefs_syscall_mkdir, | ||
| 128 | .rmdir = tracefs_syscall_rmdir, | ||
| 129 | }; | ||
| 130 | |||
| 131 | static struct inode *tracefs_get_inode(struct super_block *sb) | ||
| 132 | { | ||
| 133 | struct inode *inode = new_inode(sb); | ||
| 134 | if (inode) { | ||
| 135 | inode->i_ino = get_next_ino(); | ||
| 136 | inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; | ||
| 137 | } | ||
| 138 | return inode; | ||
| 139 | } | ||
| 140 | |||
| 141 | struct tracefs_mount_opts { | ||
| 142 | kuid_t uid; | ||
| 143 | kgid_t gid; | ||
| 144 | umode_t mode; | ||
| 145 | }; | ||
| 146 | |||
| 147 | enum { | ||
| 148 | Opt_uid, | ||
| 149 | Opt_gid, | ||
| 150 | Opt_mode, | ||
| 151 | Opt_err | ||
| 152 | }; | ||
| 153 | |||
| 154 | static const match_table_t tokens = { | ||
| 155 | {Opt_uid, "uid=%u"}, | ||
| 156 | {Opt_gid, "gid=%u"}, | ||
| 157 | {Opt_mode, "mode=%o"}, | ||
| 158 | {Opt_err, NULL} | ||
| 159 | }; | ||
| 160 | |||
| 161 | struct tracefs_fs_info { | ||
| 162 | struct tracefs_mount_opts mount_opts; | ||
| 163 | }; | ||
| 164 | |||
| 165 | static int tracefs_parse_options(char *data, struct tracefs_mount_opts *opts) | ||
| 166 | { | ||
| 167 | substring_t args[MAX_OPT_ARGS]; | ||
| 168 | int option; | ||
| 169 | int token; | ||
| 170 | kuid_t uid; | ||
| 171 | kgid_t gid; | ||
| 172 | char *p; | ||
| 173 | |||
| 174 | opts->mode = TRACEFS_DEFAULT_MODE; | ||
| 175 | |||
| 176 | while ((p = strsep(&data, ",")) != NULL) { | ||
| 177 | if (!*p) | ||
| 178 | continue; | ||
| 179 | |||
| 180 | token = match_token(p, tokens, args); | ||
| 181 | switch (token) { | ||
| 182 | case Opt_uid: | ||
| 183 | if (match_int(&args[0], &option)) | ||
| 184 | return -EINVAL; | ||
| 185 | uid = make_kuid(current_user_ns(), option); | ||
| 186 | if (!uid_valid(uid)) | ||
| 187 | return -EINVAL; | ||
| 188 | opts->uid = uid; | ||
| 189 | break; | ||
| 190 | case Opt_gid: | ||
| 191 | if (match_int(&args[0], &option)) | ||
| 192 | return -EINVAL; | ||
| 193 | gid = make_kgid(current_user_ns(), option); | ||
| 194 | if (!gid_valid(gid)) | ||
| 195 | return -EINVAL; | ||
| 196 | opts->gid = gid; | ||
| 197 | break; | ||
| 198 | case Opt_mode: | ||
| 199 | if (match_octal(&args[0], &option)) | ||
| 200 | return -EINVAL; | ||
| 201 | opts->mode = option & S_IALLUGO; | ||
| 202 | break; | ||
| 203 | /* | ||
| 204 | * We might like to report bad mount options here; | ||
| 205 | * but traditionally tracefs has ignored all mount options | ||
| 206 | */ | ||
| 207 | } | ||
| 208 | } | ||
| 209 | |||
| 210 | return 0; | ||
| 211 | } | ||
| 212 | |||
| 213 | static int tracefs_apply_options(struct super_block *sb) | ||
| 214 | { | ||
| 215 | struct tracefs_fs_info *fsi = sb->s_fs_info; | ||
| 216 | struct inode *inode = sb->s_root->d_inode; | ||
| 217 | struct tracefs_mount_opts *opts = &fsi->mount_opts; | ||
| 218 | |||
| 219 | inode->i_mode &= ~S_IALLUGO; | ||
| 220 | inode->i_mode |= opts->mode; | ||
| 221 | |||
| 222 | inode->i_uid = opts->uid; | ||
| 223 | inode->i_gid = opts->gid; | ||
| 224 | |||
| 225 | return 0; | ||
| 226 | } | ||
| 227 | |||
| 228 | static int tracefs_remount(struct super_block *sb, int *flags, char *data) | ||
| 229 | { | ||
| 230 | int err; | ||
| 231 | struct tracefs_fs_info *fsi = sb->s_fs_info; | ||
| 232 | |||
| 233 | sync_filesystem(sb); | ||
| 234 | err = tracefs_parse_options(data, &fsi->mount_opts); | ||
| 235 | if (err) | ||
| 236 | goto fail; | ||
| 237 | |||
| 238 | tracefs_apply_options(sb); | ||
| 239 | |||
| 240 | fail: | ||
| 241 | return err; | ||
| 242 | } | ||
| 243 | |||
| 244 | static int tracefs_show_options(struct seq_file *m, struct dentry *root) | ||
| 245 | { | ||
| 246 | struct tracefs_fs_info *fsi = root->d_sb->s_fs_info; | ||
| 247 | struct tracefs_mount_opts *opts = &fsi->mount_opts; | ||
| 248 | |||
| 249 | if (!uid_eq(opts->uid, GLOBAL_ROOT_UID)) | ||
| 250 | seq_printf(m, ",uid=%u", | ||
| 251 | from_kuid_munged(&init_user_ns, opts->uid)); | ||
| 252 | if (!gid_eq(opts->gid, GLOBAL_ROOT_GID)) | ||
| 253 | seq_printf(m, ",gid=%u", | ||
| 254 | from_kgid_munged(&init_user_ns, opts->gid)); | ||
| 255 | if (opts->mode != TRACEFS_DEFAULT_MODE) | ||
| 256 | seq_printf(m, ",mode=%o", opts->mode); | ||
| 257 | |||
| 258 | return 0; | ||
| 259 | } | ||
| 260 | |||
| 261 | static const struct super_operations tracefs_super_operations = { | ||
| 262 | .statfs = simple_statfs, | ||
| 263 | .remount_fs = tracefs_remount, | ||
| 264 | .show_options = tracefs_show_options, | ||
| 265 | }; | ||
| 266 | |||
| 267 | static int trace_fill_super(struct super_block *sb, void *data, int silent) | ||
| 268 | { | ||
| 269 | static struct tree_descr trace_files[] = {{""}}; | ||
| 270 | struct tracefs_fs_info *fsi; | ||
| 271 | int err; | ||
| 272 | |||
| 273 | save_mount_options(sb, data); | ||
| 274 | |||
| 275 | fsi = kzalloc(sizeof(struct tracefs_fs_info), GFP_KERNEL); | ||
| 276 | sb->s_fs_info = fsi; | ||
| 277 | if (!fsi) { | ||
| 278 | err = -ENOMEM; | ||
| 279 | goto fail; | ||
| 280 | } | ||
| 281 | |||
| 282 | err = tracefs_parse_options(data, &fsi->mount_opts); | ||
| 283 | if (err) | ||
| 284 | goto fail; | ||
| 285 | |||
| 286 | err = simple_fill_super(sb, TRACEFS_MAGIC, trace_files); | ||
| 287 | if (err) | ||
| 288 | goto fail; | ||
| 289 | |||
| 290 | sb->s_op = &tracefs_super_operations; | ||
| 291 | |||
| 292 | tracefs_apply_options(sb); | ||
| 293 | |||
| 294 | return 0; | ||
| 295 | |||
| 296 | fail: | ||
| 297 | kfree(fsi); | ||
| 298 | sb->s_fs_info = NULL; | ||
| 299 | return err; | ||
| 300 | } | ||
| 301 | |||
| 302 | static struct dentry *trace_mount(struct file_system_type *fs_type, | ||
| 303 | int flags, const char *dev_name, | ||
| 304 | void *data) | ||
| 305 | { | ||
| 306 | return mount_single(fs_type, flags, data, trace_fill_super); | ||
| 307 | } | ||
| 308 | |||
| 309 | static struct file_system_type trace_fs_type = { | ||
| 310 | .owner = THIS_MODULE, | ||
| 311 | .name = "tracefs", | ||
| 312 | .mount = trace_mount, | ||
| 313 | .kill_sb = kill_litter_super, | ||
| 314 | }; | ||
| 315 | MODULE_ALIAS_FS("tracefs"); | ||
| 316 | |||
| 317 | static struct dentry *start_creating(const char *name, struct dentry *parent) | ||
| 318 | { | ||
| 319 | struct dentry *dentry; | ||
| 320 | int error; | ||
| 321 | |||
| 322 | pr_debug("tracefs: creating file '%s'\n",name); | ||
| 323 | |||
| 324 | error = simple_pin_fs(&trace_fs_type, &tracefs_mount, | ||
| 325 | &tracefs_mount_count); | ||
| 326 | if (error) | ||
| 327 | return ERR_PTR(error); | ||
| 328 | |||
| 329 | /* If the parent is not specified, we create it in the root. | ||
| 330 | * We need the root dentry to do this, which is in the super | ||
| 331 | * block. A pointer to that is in the struct vfsmount that we | ||
| 332 | * have around. | ||
| 333 | */ | ||
| 334 | if (!parent) | ||
| 335 | parent = tracefs_mount->mnt_root; | ||
| 336 | |||
| 337 | mutex_lock(&parent->d_inode->i_mutex); | ||
| 338 | dentry = lookup_one_len(name, parent, strlen(name)); | ||
| 339 | if (!IS_ERR(dentry) && dentry->d_inode) { | ||
| 340 | dput(dentry); | ||
| 341 | dentry = ERR_PTR(-EEXIST); | ||
| 342 | } | ||
| 343 | if (IS_ERR(dentry)) | ||
| 344 | mutex_unlock(&parent->d_inode->i_mutex); | ||
| 345 | return dentry; | ||
| 346 | } | ||
| 347 | |||
| 348 | static struct dentry *failed_creating(struct dentry *dentry) | ||
| 349 | { | ||
| 350 | mutex_unlock(&dentry->d_parent->d_inode->i_mutex); | ||
| 351 | dput(dentry); | ||
| 352 | simple_release_fs(&tracefs_mount, &tracefs_mount_count); | ||
| 353 | return NULL; | ||
| 354 | } | ||
| 355 | |||
| 356 | static struct dentry *end_creating(struct dentry *dentry) | ||
| 357 | { | ||
| 358 | mutex_unlock(&dentry->d_parent->d_inode->i_mutex); | ||
| 359 | return dentry; | ||
| 360 | } | ||
| 361 | |||
| 362 | /** | ||
| 363 | * tracefs_create_file - create a file in the tracefs filesystem | ||
| 364 | * @name: a pointer to a string containing the name of the file to create. | ||
| 365 | * @mode: the permission that the file should have. | ||
| 366 | * @parent: a pointer to the parent dentry for this file. This should be a | ||
| 367 | * directory dentry if set. If this parameter is NULL, then the | ||
| 368 | * file will be created in the root of the tracefs filesystem. | ||
| 369 | * @data: a pointer to something that the caller will want to get to later | ||
| 370 | * on. The inode.i_private pointer will point to this value on | ||
| 371 | * the open() call. | ||
| 372 | * @fops: a pointer to a struct file_operations that should be used for | ||
| 373 | * this file. | ||
| 374 | * | ||
| 375 | * This is the basic "create a file" function for tracefs. It allows for a | ||
| 376 | * wide range of flexibility in creating a file, or a directory (if you want | ||
| 377 | * to create a directory, the tracefs_create_dir() function is | ||
| 378 | * recommended to be used instead.) | ||
| 379 | * | ||
| 380 | * This function will return a pointer to a dentry if it succeeds. This | ||
| 381 | * pointer must be passed to the tracefs_remove() function when the file is | ||
| 382 | * to be removed (no automatic cleanup happens if your module is unloaded, | ||
| 383 | * you are responsible here.) If an error occurs, %NULL will be returned. | ||
| 384 | * | ||
| 385 | * If tracefs is not enabled in the kernel, the value -%ENODEV will be | ||
| 386 | * returned. | ||
| 387 | */ | ||
| 388 | struct dentry *tracefs_create_file(const char *name, umode_t mode, | ||
| 389 | struct dentry *parent, void *data, | ||
| 390 | const struct file_operations *fops) | ||
| 391 | { | ||
| 392 | struct dentry *dentry; | ||
| 393 | struct inode *inode; | ||
| 394 | |||
| 395 | if (!(mode & S_IFMT)) | ||
| 396 | mode |= S_IFREG; | ||
| 397 | BUG_ON(!S_ISREG(mode)); | ||
| 398 | dentry = start_creating(name, parent); | ||
| 399 | |||
| 400 | if (IS_ERR(dentry)) | ||
| 401 | return NULL; | ||
| 402 | |||
| 403 | inode = tracefs_get_inode(dentry->d_sb); | ||
| 404 | if (unlikely(!inode)) | ||
| 405 | return failed_creating(dentry); | ||
| 406 | |||
| 407 | inode->i_mode = mode; | ||
| 408 | inode->i_fop = fops ? fops : &tracefs_file_operations; | ||
| 409 | inode->i_private = data; | ||
| 410 | d_instantiate(dentry, inode); | ||
| 411 | fsnotify_create(dentry->d_parent->d_inode, dentry); | ||
| 412 | return end_creating(dentry); | ||
| 413 | } | ||
| 414 | |||
| 415 | static struct dentry *__create_dir(const char *name, struct dentry *parent, | ||
| 416 | const struct inode_operations *ops) | ||
| 417 | { | ||
| 418 | struct dentry *dentry = start_creating(name, parent); | ||
| 419 | struct inode *inode; | ||
| 420 | |||
| 421 | if (IS_ERR(dentry)) | ||
| 422 | return NULL; | ||
| 423 | |||
| 424 | inode = tracefs_get_inode(dentry->d_sb); | ||
| 425 | if (unlikely(!inode)) | ||
| 426 | return failed_creating(dentry); | ||
| 427 | |||
| 428 | inode->i_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO; | ||
| 429 | inode->i_op = ops; | ||
| 430 | inode->i_fop = &simple_dir_operations; | ||
| 431 | |||
| 432 | /* directory inodes start off with i_nlink == 2 (for "." entry) */ | ||
| 433 | inc_nlink(inode); | ||
| 434 | d_instantiate(dentry, inode); | ||
| 435 | inc_nlink(dentry->d_parent->d_inode); | ||
| 436 | fsnotify_mkdir(dentry->d_parent->d_inode, dentry); | ||
| 437 | return end_creating(dentry); | ||
| 438 | } | ||
| 439 | |||
| 440 | /** | ||
| 441 | * tracefs_create_dir - create a directory in the tracefs filesystem | ||
| 442 | * @name: a pointer to a string containing the name of the directory to | ||
| 443 | * create. | ||
| 444 | * @parent: a pointer to the parent dentry for this file. This should be a | ||
| 445 | * directory dentry if set. If this parameter is NULL, then the | ||
| 446 | * directory will be created in the root of the tracefs filesystem. | ||
| 447 | * | ||
| 448 | * This function creates a directory in tracefs with the given name. | ||
| 449 | * | ||
| 450 | * This function will return a pointer to a dentry if it succeeds. This | ||
| 451 | * pointer must be passed to the tracefs_remove() function when the file is | ||
| 452 | * to be removed. If an error occurs, %NULL will be returned. | ||
| 453 | * | ||
| 454 | * If tracing is not enabled in the kernel, the value -%ENODEV will be | ||
| 455 | * returned. | ||
| 456 | */ | ||
| 457 | struct dentry *tracefs_create_dir(const char *name, struct dentry *parent) | ||
| 458 | { | ||
| 459 | return __create_dir(name, parent, &simple_dir_inode_operations); | ||
| 460 | } | ||
| 461 | |||
| 462 | /** | ||
| 463 | * tracefs_create_instance_dir - create the tracing instances directory | ||
| 464 | * @name: The name of the instances directory to create | ||
| 465 | * @parent: The parent directory that the instances directory will exist | ||
| 466 | * @mkdir: The function to call when a mkdir is performed. | ||
| 467 | * @rmdir: The function to call when a rmdir is performed. | ||
| 468 | * | ||
| 469 | * Only one instances directory is allowed. | ||
| 470 | * | ||
| 471 | * The instances directory is special as it allows for mkdir and rmdir to | ||
| 472 | * to be done by userspace. When a mkdir or rmdir is performed, the inode | ||
| 473 | * locks are released and the methhods passed in (@mkdir and @rmdir) are | ||
| 474 | * called without locks and with the name of the directory being created | ||
| 475 | * within the instances directory. | ||
| 476 | * | ||
| 477 | * Returns the dentry of the instances directory. | ||
| 478 | */ | ||
| 479 | struct dentry *tracefs_create_instance_dir(const char *name, struct dentry *parent, | ||
| 480 | int (*mkdir)(const char *name), | ||
| 481 | int (*rmdir)(const char *name)) | ||
| 482 | { | ||
| 483 | struct dentry *dentry; | ||
| 484 | |||
| 485 | /* Only allow one instance of the instances directory. */ | ||
| 486 | if (WARN_ON(tracefs_ops.mkdir || tracefs_ops.rmdir)) | ||
| 487 | return NULL; | ||
| 488 | |||
| 489 | dentry = __create_dir(name, parent, &tracefs_dir_inode_operations); | ||
| 490 | if (!dentry) | ||
| 491 | return NULL; | ||
| 492 | |||
| 493 | tracefs_ops.mkdir = mkdir; | ||
| 494 | tracefs_ops.rmdir = rmdir; | ||
| 495 | |||
| 496 | return dentry; | ||
| 497 | } | ||
| 498 | |||
| 499 | static inline int tracefs_positive(struct dentry *dentry) | ||
| 500 | { | ||
| 501 | return dentry->d_inode && !d_unhashed(dentry); | ||
| 502 | } | ||
| 503 | |||
| 504 | static int __tracefs_remove(struct dentry *dentry, struct dentry *parent) | ||
| 505 | { | ||
| 506 | int ret = 0; | ||
| 507 | |||
| 508 | if (tracefs_positive(dentry)) { | ||
| 509 | if (dentry->d_inode) { | ||
| 510 | dget(dentry); | ||
| 511 | switch (dentry->d_inode->i_mode & S_IFMT) { | ||
| 512 | case S_IFDIR: | ||
| 513 | ret = simple_rmdir(parent->d_inode, dentry); | ||
| 514 | break; | ||
| 515 | default: | ||
| 516 | simple_unlink(parent->d_inode, dentry); | ||
| 517 | break; | ||
| 518 | } | ||
| 519 | if (!ret) | ||
| 520 | d_delete(dentry); | ||
| 521 | dput(dentry); | ||
| 522 | } | ||
| 523 | } | ||
| 524 | return ret; | ||
| 525 | } | ||
| 526 | |||
| 527 | /** | ||
| 528 | * tracefs_remove - removes a file or directory from the tracefs filesystem | ||
| 529 | * @dentry: a pointer to a the dentry of the file or directory to be | ||
| 530 | * removed. | ||
| 531 | * | ||
| 532 | * This function removes a file or directory in tracefs that was previously | ||
| 533 | * created with a call to another tracefs function (like | ||
| 534 | * tracefs_create_file() or variants thereof.) | ||
| 535 | */ | ||
| 536 | void tracefs_remove(struct dentry *dentry) | ||
| 537 | { | ||
| 538 | struct dentry *parent; | ||
| 539 | int ret; | ||
| 540 | |||
| 541 | if (IS_ERR_OR_NULL(dentry)) | ||
| 542 | return; | ||
| 543 | |||
| 544 | parent = dentry->d_parent; | ||
| 545 | if (!parent || !parent->d_inode) | ||
| 546 | return; | ||
| 547 | |||
| 548 | mutex_lock(&parent->d_inode->i_mutex); | ||
| 549 | ret = __tracefs_remove(dentry, parent); | ||
| 550 | mutex_unlock(&parent->d_inode->i_mutex); | ||
| 551 | if (!ret) | ||
| 552 | simple_release_fs(&tracefs_mount, &tracefs_mount_count); | ||
| 553 | } | ||
| 554 | |||
| 555 | /** | ||
| 556 | * tracefs_remove_recursive - recursively removes a directory | ||
| 557 | * @dentry: a pointer to a the dentry of the directory to be removed. | ||
| 558 | * | ||
| 559 | * This function recursively removes a directory tree in tracefs that | ||
| 560 | * was previously created with a call to another tracefs function | ||
| 561 | * (like tracefs_create_file() or variants thereof.) | ||
| 562 | */ | ||
| 563 | void tracefs_remove_recursive(struct dentry *dentry) | ||
| 564 | { | ||
| 565 | struct dentry *child, *parent; | ||
| 566 | |||
| 567 | if (IS_ERR_OR_NULL(dentry)) | ||
| 568 | return; | ||
| 569 | |||
| 570 | parent = dentry->d_parent; | ||
| 571 | if (!parent || !parent->d_inode) | ||
| 572 | return; | ||
| 573 | |||
| 574 | parent = dentry; | ||
| 575 | down: | ||
| 576 | mutex_lock(&parent->d_inode->i_mutex); | ||
| 577 | loop: | ||
| 578 | /* | ||
| 579 | * The parent->d_subdirs is protected by the d_lock. Outside that | ||
| 580 | * lock, the child can be unlinked and set to be freed which can | ||
| 581 | * use the d_u.d_child as the rcu head and corrupt this list. | ||
| 582 | */ | ||
| 583 | spin_lock(&parent->d_lock); | ||
| 584 | list_for_each_entry(child, &parent->d_subdirs, d_child) { | ||
| 585 | if (!tracefs_positive(child)) | ||
| 586 | continue; | ||
| 587 | |||
| 588 | /* perhaps simple_empty(child) makes more sense */ | ||
| 589 | if (!list_empty(&child->d_subdirs)) { | ||
| 590 | spin_unlock(&parent->d_lock); | ||
| 591 | mutex_unlock(&parent->d_inode->i_mutex); | ||
| 592 | parent = child; | ||
| 593 | goto down; | ||
| 594 | } | ||
| 595 | |||
| 596 | spin_unlock(&parent->d_lock); | ||
| 597 | |||
| 598 | if (!__tracefs_remove(child, parent)) | ||
| 599 | simple_release_fs(&tracefs_mount, &tracefs_mount_count); | ||
| 600 | |||
| 601 | /* | ||
| 602 | * The parent->d_lock protects agaist child from unlinking | ||
| 603 | * from d_subdirs. When releasing the parent->d_lock we can | ||
| 604 | * no longer trust that the next pointer is valid. | ||
| 605 | * Restart the loop. We'll skip this one with the | ||
| 606 | * tracefs_positive() check. | ||
| 607 | */ | ||
| 608 | goto loop; | ||
| 609 | } | ||
| 610 | spin_unlock(&parent->d_lock); | ||
| 611 | |||
| 612 | mutex_unlock(&parent->d_inode->i_mutex); | ||
| 613 | child = parent; | ||
| 614 | parent = parent->d_parent; | ||
| 615 | mutex_lock(&parent->d_inode->i_mutex); | ||
| 616 | |||
| 617 | if (child != dentry) | ||
| 618 | /* go up */ | ||
| 619 | goto loop; | ||
| 620 | |||
| 621 | if (!__tracefs_remove(child, parent)) | ||
| 622 | simple_release_fs(&tracefs_mount, &tracefs_mount_count); | ||
| 623 | mutex_unlock(&parent->d_inode->i_mutex); | ||
| 624 | } | ||
| 625 | |||
| 626 | /** | ||
| 627 | * tracefs_initialized - Tells whether tracefs has been registered | ||
| 628 | */ | ||
| 629 | bool tracefs_initialized(void) | ||
| 630 | { | ||
| 631 | return tracefs_registered; | ||
| 632 | } | ||
| 633 | |||
| 634 | static struct kobject *trace_kobj; | ||
| 635 | |||
| 636 | static int __init tracefs_init(void) | ||
| 637 | { | ||
| 638 | int retval; | ||
| 639 | |||
| 640 | trace_kobj = kobject_create_and_add("tracing", kernel_kobj); | ||
| 641 | if (!trace_kobj) | ||
| 642 | return -EINVAL; | ||
| 643 | |||
| 644 | retval = register_filesystem(&trace_fs_type); | ||
| 645 | if (!retval) | ||
| 646 | tracefs_registered = true; | ||
| 647 | |||
| 648 | return retval; | ||
| 649 | } | ||
| 650 | core_initcall(tracefs_init); | ||
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index e627c0acf626..c3d15fe83403 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c | |||
| @@ -50,7 +50,6 @@ | |||
| 50 | */ | 50 | */ |
| 51 | 51 | ||
| 52 | #include "ubifs.h" | 52 | #include "ubifs.h" |
| 53 | #include <linux/aio.h> | ||
| 54 | #include <linux/mount.h> | 53 | #include <linux/mount.h> |
| 55 | #include <linux/namei.h> | 54 | #include <linux/namei.h> |
| 56 | #include <linux/slab.h> | 55 | #include <linux/slab.h> |
diff --git a/fs/udf/file.c b/fs/udf/file.c index 08f3555fbeac..7f885cc8b0b7 100644 --- a/fs/udf/file.c +++ b/fs/udf/file.c | |||
| @@ -34,7 +34,7 @@ | |||
| 34 | #include <linux/errno.h> | 34 | #include <linux/errno.h> |
| 35 | #include <linux/pagemap.h> | 35 | #include <linux/pagemap.h> |
| 36 | #include <linux/buffer_head.h> | 36 | #include <linux/buffer_head.h> |
| 37 | #include <linux/aio.h> | 37 | #include <linux/uio.h> |
| 38 | 38 | ||
| 39 | #include "udf_i.h" | 39 | #include "udf_i.h" |
| 40 | #include "udf_sb.h" | 40 | #include "udf_sb.h" |
| @@ -122,7 +122,7 @@ static ssize_t udf_file_write_iter(struct kiocb *iocb, struct iov_iter *from) | |||
| 122 | struct file *file = iocb->ki_filp; | 122 | struct file *file = iocb->ki_filp; |
| 123 | struct inode *inode = file_inode(file); | 123 | struct inode *inode = file_inode(file); |
| 124 | int err, pos; | 124 | int err, pos; |
| 125 | size_t count = iocb->ki_nbytes; | 125 | size_t count = iov_iter_count(from); |
| 126 | struct udf_inode_info *iinfo = UDF_I(inode); | 126 | struct udf_inode_info *iinfo = UDF_I(inode); |
| 127 | 127 | ||
| 128 | mutex_lock(&inode->i_mutex); | 128 | mutex_lock(&inode->i_mutex); |
diff --git a/fs/udf/inode.c b/fs/udf/inode.c index a445d599098d..9c1fbd23913d 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c | |||
| @@ -38,7 +38,7 @@ | |||
| 38 | #include <linux/slab.h> | 38 | #include <linux/slab.h> |
| 39 | #include <linux/crc-itu-t.h> | 39 | #include <linux/crc-itu-t.h> |
| 40 | #include <linux/mpage.h> | 40 | #include <linux/mpage.h> |
| 41 | #include <linux/aio.h> | 41 | #include <linux/uio.h> |
| 42 | 42 | ||
| 43 | #include "udf_i.h" | 43 | #include "udf_i.h" |
| 44 | #include "udf_sb.h" | 44 | #include "udf_sb.h" |
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 3a9b7a1b8704..4f8cdc59bc38 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c | |||
| @@ -31,7 +31,6 @@ | |||
| 31 | #include "xfs_bmap.h" | 31 | #include "xfs_bmap.h" |
| 32 | #include "xfs_bmap_util.h" | 32 | #include "xfs_bmap_util.h" |
| 33 | #include "xfs_bmap_btree.h" | 33 | #include "xfs_bmap_btree.h" |
| 34 | #include <linux/aio.h> | ||
| 35 | #include <linux/gfp.h> | 34 | #include <linux/gfp.h> |
| 36 | #include <linux/mpage.h> | 35 | #include <linux/mpage.h> |
| 37 | #include <linux/pagevec.h> | 36 | #include <linux/pagevec.h> |
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index a2e1cb8a568b..f44212fae653 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c | |||
| @@ -38,7 +38,6 @@ | |||
| 38 | #include "xfs_icache.h" | 38 | #include "xfs_icache.h" |
| 39 | #include "xfs_pnfs.h" | 39 | #include "xfs_pnfs.h" |
| 40 | 40 | ||
| 41 | #include <linux/aio.h> | ||
| 42 | #include <linux/dcache.h> | 41 | #include <linux/dcache.h> |
| 43 | #include <linux/falloc.h> | 42 | #include <linux/falloc.h> |
| 44 | #include <linux/pagevec.h> | 43 | #include <linux/pagevec.h> |
