diff options
Diffstat (limited to 'fs')
| -rw-r--r-- | fs/aio.c | 70 | ||||
| -rw-r--r-- | fs/btrfs/extent_io.c | 39 | ||||
| -rw-r--r-- | fs/btrfs/extent_io.h | 3 | ||||
| -rw-r--r-- | fs/btrfs/ioctl.c | 147 | ||||
| -rw-r--r-- | fs/btrfs/qgroup.c | 4 | ||||
| -rw-r--r-- | fs/btrfs/reada.c | 9 | ||||
| -rw-r--r-- | fs/btrfs/tests/btrfs-tests.c | 2 | ||||
| -rw-r--r-- | fs/btrfs/tests/qgroup-tests.c | 2 | ||||
| -rw-r--r-- | fs/btrfs/transaction.c | 12 | ||||
| -rw-r--r-- | fs/ceph/acl.c | 6 | ||||
| -rw-r--r-- | fs/ceph/addr.c | 17 | ||||
| -rw-r--r-- | fs/ceph/caps.c | 246 | ||||
| -rw-r--r-- | fs/ceph/export.c | 2 | ||||
| -rw-r--r-- | fs/ceph/inode.c | 247 | ||||
| -rw-r--r-- | fs/ceph/mds_client.c | 9 | ||||
| -rw-r--r-- | fs/ceph/mds_client.h | 1 | ||||
| -rw-r--r-- | fs/ceph/super.h | 13 | ||||
| -rw-r--r-- | fs/dlm/lowcomms.c | 5 | ||||
| -rw-r--r-- | fs/eventpoll.c | 4 |
19 files changed, 535 insertions, 303 deletions
| @@ -477,7 +477,7 @@ void kiocb_set_cancel_fn(struct kiocb *req, kiocb_cancel_fn *cancel) | |||
| 477 | } | 477 | } |
| 478 | EXPORT_SYMBOL(kiocb_set_cancel_fn); | 478 | EXPORT_SYMBOL(kiocb_set_cancel_fn); |
| 479 | 479 | ||
| 480 | static int kiocb_cancel(struct kioctx *ctx, struct kiocb *kiocb) | 480 | static int kiocb_cancel(struct kiocb *kiocb) |
| 481 | { | 481 | { |
| 482 | kiocb_cancel_fn *old, *cancel; | 482 | kiocb_cancel_fn *old, *cancel; |
| 483 | 483 | ||
| @@ -538,7 +538,7 @@ static void free_ioctx_users(struct percpu_ref *ref) | |||
| 538 | struct kiocb, ki_list); | 538 | struct kiocb, ki_list); |
| 539 | 539 | ||
| 540 | list_del_init(&req->ki_list); | 540 | list_del_init(&req->ki_list); |
| 541 | kiocb_cancel(ctx, req); | 541 | kiocb_cancel(req); |
| 542 | } | 542 | } |
| 543 | 543 | ||
| 544 | spin_unlock_irq(&ctx->ctx_lock); | 544 | spin_unlock_irq(&ctx->ctx_lock); |
| @@ -727,42 +727,42 @@ err: | |||
| 727 | * when the processes owning a context have all exited to encourage | 727 | * when the processes owning a context have all exited to encourage |
| 728 | * the rapid destruction of the kioctx. | 728 | * the rapid destruction of the kioctx. |
| 729 | */ | 729 | */ |
| 730 | static void kill_ioctx(struct mm_struct *mm, struct kioctx *ctx, | 730 | static int kill_ioctx(struct mm_struct *mm, struct kioctx *ctx, |
| 731 | struct completion *requests_done) | 731 | struct completion *requests_done) |
| 732 | { | 732 | { |
| 733 | if (!atomic_xchg(&ctx->dead, 1)) { | 733 | struct kioctx_table *table; |
| 734 | struct kioctx_table *table; | ||
| 735 | 734 | ||
| 736 | spin_lock(&mm->ioctx_lock); | 735 | if (atomic_xchg(&ctx->dead, 1)) |
| 737 | rcu_read_lock(); | 736 | return -EINVAL; |
| 738 | table = rcu_dereference(mm->ioctx_table); | ||
| 739 | 737 | ||
| 740 | WARN_ON(ctx != table->table[ctx->id]); | ||
| 741 | table->table[ctx->id] = NULL; | ||
| 742 | rcu_read_unlock(); | ||
| 743 | spin_unlock(&mm->ioctx_lock); | ||
| 744 | 738 | ||
| 745 | /* percpu_ref_kill() will do the necessary call_rcu() */ | 739 | spin_lock(&mm->ioctx_lock); |
| 746 | wake_up_all(&ctx->wait); | 740 | rcu_read_lock(); |
| 741 | table = rcu_dereference(mm->ioctx_table); | ||
| 747 | 742 | ||
| 748 | /* | 743 | WARN_ON(ctx != table->table[ctx->id]); |
| 749 | * It'd be more correct to do this in free_ioctx(), after all | 744 | table->table[ctx->id] = NULL; |
| 750 | * the outstanding kiocbs have finished - but by then io_destroy | 745 | rcu_read_unlock(); |
| 751 | * has already returned, so io_setup() could potentially return | 746 | spin_unlock(&mm->ioctx_lock); |
| 752 | * -EAGAIN with no ioctxs actually in use (as far as userspace | ||
| 753 | * could tell). | ||
| 754 | */ | ||
| 755 | aio_nr_sub(ctx->max_reqs); | ||
| 756 | 747 | ||
| 757 | if (ctx->mmap_size) | 748 | /* percpu_ref_kill() will do the necessary call_rcu() */ |
| 758 | vm_munmap(ctx->mmap_base, ctx->mmap_size); | 749 | wake_up_all(&ctx->wait); |
| 759 | 750 | ||
| 760 | ctx->requests_done = requests_done; | 751 | /* |
| 761 | percpu_ref_kill(&ctx->users); | 752 | * It'd be more correct to do this in free_ioctx(), after all |
| 762 | } else { | 753 | * the outstanding kiocbs have finished - but by then io_destroy |
| 763 | if (requests_done) | 754 | * has already returned, so io_setup() could potentially return |
| 764 | complete(requests_done); | 755 | * -EAGAIN with no ioctxs actually in use (as far as userspace |
| 765 | } | 756 | * could tell). |
| 757 | */ | ||
| 758 | aio_nr_sub(ctx->max_reqs); | ||
| 759 | |||
| 760 | if (ctx->mmap_size) | ||
| 761 | vm_munmap(ctx->mmap_base, ctx->mmap_size); | ||
| 762 | |||
| 763 | ctx->requests_done = requests_done; | ||
| 764 | percpu_ref_kill(&ctx->users); | ||
| 765 | return 0; | ||
| 766 | } | 766 | } |
| 767 | 767 | ||
| 768 | /* wait_on_sync_kiocb: | 768 | /* wait_on_sync_kiocb: |
| @@ -1219,21 +1219,23 @@ SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx) | |||
| 1219 | if (likely(NULL != ioctx)) { | 1219 | if (likely(NULL != ioctx)) { |
| 1220 | struct completion requests_done = | 1220 | struct completion requests_done = |
| 1221 | COMPLETION_INITIALIZER_ONSTACK(requests_done); | 1221 | COMPLETION_INITIALIZER_ONSTACK(requests_done); |
| 1222 | int ret; | ||
| 1222 | 1223 | ||
| 1223 | /* Pass requests_done to kill_ioctx() where it can be set | 1224 | /* Pass requests_done to kill_ioctx() where it can be set |
| 1224 | * in a thread-safe way. If we try to set it here then we have | 1225 | * in a thread-safe way. If we try to set it here then we have |
| 1225 | * a race condition if two io_destroy() called simultaneously. | 1226 | * a race condition if two io_destroy() called simultaneously. |
| 1226 | */ | 1227 | */ |
| 1227 | kill_ioctx(current->mm, ioctx, &requests_done); | 1228 | ret = kill_ioctx(current->mm, ioctx, &requests_done); |
| 1228 | percpu_ref_put(&ioctx->users); | 1229 | percpu_ref_put(&ioctx->users); |
| 1229 | 1230 | ||
| 1230 | /* Wait until all IO for the context are done. Otherwise kernel | 1231 | /* Wait until all IO for the context are done. Otherwise kernel |
| 1231 | * keep using user-space buffers even if user thinks the context | 1232 | * keep using user-space buffers even if user thinks the context |
| 1232 | * is destroyed. | 1233 | * is destroyed. |
| 1233 | */ | 1234 | */ |
| 1234 | wait_for_completion(&requests_done); | 1235 | if (!ret) |
| 1236 | wait_for_completion(&requests_done); | ||
| 1235 | 1237 | ||
| 1236 | return 0; | 1238 | return ret; |
| 1237 | } | 1239 | } |
| 1238 | pr_debug("EINVAL: io_destroy: invalid context id\n"); | 1240 | pr_debug("EINVAL: io_destroy: invalid context id\n"); |
| 1239 | return -EINVAL; | 1241 | return -EINVAL; |
| @@ -1595,7 +1597,7 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb, | |||
| 1595 | 1597 | ||
| 1596 | kiocb = lookup_kiocb(ctx, iocb, key); | 1598 | kiocb = lookup_kiocb(ctx, iocb, key); |
| 1597 | if (kiocb) | 1599 | if (kiocb) |
| 1598 | ret = kiocb_cancel(ctx, kiocb); | 1600 | ret = kiocb_cancel(kiocb); |
| 1599 | else | 1601 | else |
| 1600 | ret = -EINVAL; | 1602 | ret = -EINVAL; |
| 1601 | 1603 | ||
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index f25a9092b946..a389820d158b 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
| @@ -2354,7 +2354,7 @@ int end_extent_writepage(struct page *page, int err, u64 start, u64 end) | |||
| 2354 | { | 2354 | { |
| 2355 | int uptodate = (err == 0); | 2355 | int uptodate = (err == 0); |
| 2356 | struct extent_io_tree *tree; | 2356 | struct extent_io_tree *tree; |
| 2357 | int ret; | 2357 | int ret = 0; |
| 2358 | 2358 | ||
| 2359 | tree = &BTRFS_I(page->mapping->host)->io_tree; | 2359 | tree = &BTRFS_I(page->mapping->host)->io_tree; |
| 2360 | 2360 | ||
| @@ -5068,6 +5068,43 @@ void read_extent_buffer(struct extent_buffer *eb, void *dstv, | |||
| 5068 | } | 5068 | } |
| 5069 | } | 5069 | } |
| 5070 | 5070 | ||
| 5071 | int read_extent_buffer_to_user(struct extent_buffer *eb, void __user *dstv, | ||
| 5072 | unsigned long start, | ||
| 5073 | unsigned long len) | ||
| 5074 | { | ||
| 5075 | size_t cur; | ||
| 5076 | size_t offset; | ||
| 5077 | struct page *page; | ||
| 5078 | char *kaddr; | ||
| 5079 | char __user *dst = (char __user *)dstv; | ||
| 5080 | size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); | ||
| 5081 | unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; | ||
| 5082 | int ret = 0; | ||
| 5083 | |||
| 5084 | WARN_ON(start > eb->len); | ||
| 5085 | WARN_ON(start + len > eb->start + eb->len); | ||
| 5086 | |||
| 5087 | offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1); | ||
| 5088 | |||
| 5089 | while (len > 0) { | ||
| 5090 | page = extent_buffer_page(eb, i); | ||
| 5091 | |||
| 5092 | cur = min(len, (PAGE_CACHE_SIZE - offset)); | ||
| 5093 | kaddr = page_address(page); | ||
| 5094 | if (copy_to_user(dst, kaddr + offset, cur)) { | ||
| 5095 | ret = -EFAULT; | ||
| 5096 | break; | ||
| 5097 | } | ||
| 5098 | |||
| 5099 | dst += cur; | ||
| 5100 | len -= cur; | ||
| 5101 | offset = 0; | ||
| 5102 | i++; | ||
| 5103 | } | ||
| 5104 | |||
| 5105 | return ret; | ||
| 5106 | } | ||
| 5107 | |||
| 5071 | int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start, | 5108 | int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start, |
| 5072 | unsigned long min_len, char **map, | 5109 | unsigned long min_len, char **map, |
| 5073 | unsigned long *map_start, | 5110 | unsigned long *map_start, |
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 8b63f2d46518..15ce5f2a2b62 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h | |||
| @@ -304,6 +304,9 @@ int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv, | |||
| 304 | void read_extent_buffer(struct extent_buffer *eb, void *dst, | 304 | void read_extent_buffer(struct extent_buffer *eb, void *dst, |
| 305 | unsigned long start, | 305 | unsigned long start, |
| 306 | unsigned long len); | 306 | unsigned long len); |
| 307 | int read_extent_buffer_to_user(struct extent_buffer *eb, void __user *dst, | ||
| 308 | unsigned long start, | ||
| 309 | unsigned long len); | ||
| 307 | void write_extent_buffer(struct extent_buffer *eb, const void *src, | 310 | void write_extent_buffer(struct extent_buffer *eb, const void *src, |
| 308 | unsigned long start, unsigned long len); | 311 | unsigned long start, unsigned long len); |
| 309 | void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src, | 312 | void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src, |
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 82c18ba12e3f..0d321c23069a 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c | |||
| @@ -1957,7 +1957,8 @@ static noinline int copy_to_sk(struct btrfs_root *root, | |||
| 1957 | struct btrfs_path *path, | 1957 | struct btrfs_path *path, |
| 1958 | struct btrfs_key *key, | 1958 | struct btrfs_key *key, |
| 1959 | struct btrfs_ioctl_search_key *sk, | 1959 | struct btrfs_ioctl_search_key *sk, |
| 1960 | char *buf, | 1960 | size_t *buf_size, |
| 1961 | char __user *ubuf, | ||
| 1961 | unsigned long *sk_offset, | 1962 | unsigned long *sk_offset, |
| 1962 | int *num_found) | 1963 | int *num_found) |
| 1963 | { | 1964 | { |
| @@ -1989,13 +1990,25 @@ static noinline int copy_to_sk(struct btrfs_root *root, | |||
| 1989 | if (!key_in_sk(key, sk)) | 1990 | if (!key_in_sk(key, sk)) |
| 1990 | continue; | 1991 | continue; |
| 1991 | 1992 | ||
| 1992 | if (sizeof(sh) + item_len > BTRFS_SEARCH_ARGS_BUFSIZE) | 1993 | if (sizeof(sh) + item_len > *buf_size) { |
| 1994 | if (*num_found) { | ||
| 1995 | ret = 1; | ||
| 1996 | goto out; | ||
| 1997 | } | ||
| 1998 | |||
| 1999 | /* | ||
| 2000 | * return one empty item back for v1, which does not | ||
| 2001 | * handle -EOVERFLOW | ||
| 2002 | */ | ||
| 2003 | |||
| 2004 | *buf_size = sizeof(sh) + item_len; | ||
| 1993 | item_len = 0; | 2005 | item_len = 0; |
| 2006 | ret = -EOVERFLOW; | ||
| 2007 | } | ||
| 1994 | 2008 | ||
| 1995 | if (sizeof(sh) + item_len + *sk_offset > | 2009 | if (sizeof(sh) + item_len + *sk_offset > *buf_size) { |
| 1996 | BTRFS_SEARCH_ARGS_BUFSIZE) { | ||
| 1997 | ret = 1; | 2010 | ret = 1; |
| 1998 | goto overflow; | 2011 | goto out; |
| 1999 | } | 2012 | } |
| 2000 | 2013 | ||
| 2001 | sh.objectid = key->objectid; | 2014 | sh.objectid = key->objectid; |
| @@ -2005,20 +2018,33 @@ static noinline int copy_to_sk(struct btrfs_root *root, | |||
| 2005 | sh.transid = found_transid; | 2018 | sh.transid = found_transid; |
| 2006 | 2019 | ||
| 2007 | /* copy search result header */ | 2020 | /* copy search result header */ |
| 2008 | memcpy(buf + *sk_offset, &sh, sizeof(sh)); | 2021 | if (copy_to_user(ubuf + *sk_offset, &sh, sizeof(sh))) { |
| 2022 | ret = -EFAULT; | ||
| 2023 | goto out; | ||
| 2024 | } | ||
| 2025 | |||
| 2009 | *sk_offset += sizeof(sh); | 2026 | *sk_offset += sizeof(sh); |
| 2010 | 2027 | ||
| 2011 | if (item_len) { | 2028 | if (item_len) { |
| 2012 | char *p = buf + *sk_offset; | 2029 | char __user *up = ubuf + *sk_offset; |
| 2013 | /* copy the item */ | 2030 | /* copy the item */ |
| 2014 | read_extent_buffer(leaf, p, | 2031 | if (read_extent_buffer_to_user(leaf, up, |
| 2015 | item_off, item_len); | 2032 | item_off, item_len)) { |
| 2033 | ret = -EFAULT; | ||
| 2034 | goto out; | ||
| 2035 | } | ||
| 2036 | |||
| 2016 | *sk_offset += item_len; | 2037 | *sk_offset += item_len; |
| 2017 | } | 2038 | } |
| 2018 | (*num_found)++; | 2039 | (*num_found)++; |
| 2019 | 2040 | ||
| 2020 | if (*num_found >= sk->nr_items) | 2041 | if (ret) /* -EOVERFLOW from above */ |
| 2021 | break; | 2042 | goto out; |
| 2043 | |||
| 2044 | if (*num_found >= sk->nr_items) { | ||
| 2045 | ret = 1; | ||
| 2046 | goto out; | ||
| 2047 | } | ||
| 2022 | } | 2048 | } |
| 2023 | advance_key: | 2049 | advance_key: |
| 2024 | ret = 0; | 2050 | ret = 0; |
| @@ -2033,22 +2059,37 @@ advance_key: | |||
| 2033 | key->objectid++; | 2059 | key->objectid++; |
| 2034 | } else | 2060 | } else |
| 2035 | ret = 1; | 2061 | ret = 1; |
| 2036 | overflow: | 2062 | out: |
| 2063 | /* | ||
| 2064 | * 0: all items from this leaf copied, continue with next | ||
| 2065 | * 1: * more items can be copied, but unused buffer is too small | ||
| 2066 | * * all items were found | ||
| 2067 | * Either way, it will stops the loop which iterates to the next | ||
| 2068 | * leaf | ||
| 2069 | * -EOVERFLOW: item was to large for buffer | ||
| 2070 | * -EFAULT: could not copy extent buffer back to userspace | ||
| 2071 | */ | ||
| 2037 | return ret; | 2072 | return ret; |
| 2038 | } | 2073 | } |
| 2039 | 2074 | ||
| 2040 | static noinline int search_ioctl(struct inode *inode, | 2075 | static noinline int search_ioctl(struct inode *inode, |
| 2041 | struct btrfs_ioctl_search_args *args) | 2076 | struct btrfs_ioctl_search_key *sk, |
| 2077 | size_t *buf_size, | ||
| 2078 | char __user *ubuf) | ||
| 2042 | { | 2079 | { |
| 2043 | struct btrfs_root *root; | 2080 | struct btrfs_root *root; |
| 2044 | struct btrfs_key key; | 2081 | struct btrfs_key key; |
| 2045 | struct btrfs_path *path; | 2082 | struct btrfs_path *path; |
| 2046 | struct btrfs_ioctl_search_key *sk = &args->key; | ||
| 2047 | struct btrfs_fs_info *info = BTRFS_I(inode)->root->fs_info; | 2083 | struct btrfs_fs_info *info = BTRFS_I(inode)->root->fs_info; |
| 2048 | int ret; | 2084 | int ret; |
| 2049 | int num_found = 0; | 2085 | int num_found = 0; |
| 2050 | unsigned long sk_offset = 0; | 2086 | unsigned long sk_offset = 0; |
| 2051 | 2087 | ||
| 2088 | if (*buf_size < sizeof(struct btrfs_ioctl_search_header)) { | ||
| 2089 | *buf_size = sizeof(struct btrfs_ioctl_search_header); | ||
| 2090 | return -EOVERFLOW; | ||
| 2091 | } | ||
| 2092 | |||
| 2052 | path = btrfs_alloc_path(); | 2093 | path = btrfs_alloc_path(); |
| 2053 | if (!path) | 2094 | if (!path) |
| 2054 | return -ENOMEM; | 2095 | return -ENOMEM; |
| @@ -2082,14 +2123,15 @@ static noinline int search_ioctl(struct inode *inode, | |||
| 2082 | ret = 0; | 2123 | ret = 0; |
| 2083 | goto err; | 2124 | goto err; |
| 2084 | } | 2125 | } |
| 2085 | ret = copy_to_sk(root, path, &key, sk, args->buf, | 2126 | ret = copy_to_sk(root, path, &key, sk, buf_size, ubuf, |
| 2086 | &sk_offset, &num_found); | 2127 | &sk_offset, &num_found); |
| 2087 | btrfs_release_path(path); | 2128 | btrfs_release_path(path); |
| 2088 | if (ret || num_found >= sk->nr_items) | 2129 | if (ret) |
| 2089 | break; | 2130 | break; |
| 2090 | 2131 | ||
| 2091 | } | 2132 | } |
| 2092 | ret = 0; | 2133 | if (ret > 0) |
| 2134 | ret = 0; | ||
| 2093 | err: | 2135 | err: |
| 2094 | sk->nr_items = num_found; | 2136 | sk->nr_items = num_found; |
| 2095 | btrfs_free_path(path); | 2137 | btrfs_free_path(path); |
| @@ -2099,22 +2141,73 @@ err: | |||
| 2099 | static noinline int btrfs_ioctl_tree_search(struct file *file, | 2141 | static noinline int btrfs_ioctl_tree_search(struct file *file, |
| 2100 | void __user *argp) | 2142 | void __user *argp) |
| 2101 | { | 2143 | { |
| 2102 | struct btrfs_ioctl_search_args *args; | 2144 | struct btrfs_ioctl_search_args __user *uargs; |
| 2103 | struct inode *inode; | 2145 | struct btrfs_ioctl_search_key sk; |
| 2104 | int ret; | 2146 | struct inode *inode; |
| 2147 | int ret; | ||
| 2148 | size_t buf_size; | ||
| 2105 | 2149 | ||
| 2106 | if (!capable(CAP_SYS_ADMIN)) | 2150 | if (!capable(CAP_SYS_ADMIN)) |
| 2107 | return -EPERM; | 2151 | return -EPERM; |
| 2108 | 2152 | ||
| 2109 | args = memdup_user(argp, sizeof(*args)); | 2153 | uargs = (struct btrfs_ioctl_search_args __user *)argp; |
| 2110 | if (IS_ERR(args)) | 2154 | |
| 2111 | return PTR_ERR(args); | 2155 | if (copy_from_user(&sk, &uargs->key, sizeof(sk))) |
| 2156 | return -EFAULT; | ||
| 2157 | |||
| 2158 | buf_size = sizeof(uargs->buf); | ||
| 2112 | 2159 | ||
| 2113 | inode = file_inode(file); | 2160 | inode = file_inode(file); |
| 2114 | ret = search_ioctl(inode, args); | 2161 | ret = search_ioctl(inode, &sk, &buf_size, uargs->buf); |
| 2115 | if (ret == 0 && copy_to_user(argp, args, sizeof(*args))) | 2162 | |
| 2163 | /* | ||
| 2164 | * In the origin implementation an overflow is handled by returning a | ||
| 2165 | * search header with a len of zero, so reset ret. | ||
| 2166 | */ | ||
| 2167 | if (ret == -EOVERFLOW) | ||
| 2168 | ret = 0; | ||
| 2169 | |||
| 2170 | if (ret == 0 && copy_to_user(&uargs->key, &sk, sizeof(sk))) | ||
| 2116 | ret = -EFAULT; | 2171 | ret = -EFAULT; |
| 2117 | kfree(args); | 2172 | return ret; |
| 2173 | } | ||
| 2174 | |||
| 2175 | static noinline int btrfs_ioctl_tree_search_v2(struct file *file, | ||
| 2176 | void __user *argp) | ||
| 2177 | { | ||
| 2178 | struct btrfs_ioctl_search_args_v2 __user *uarg; | ||
| 2179 | struct btrfs_ioctl_search_args_v2 args; | ||
| 2180 | struct inode *inode; | ||
| 2181 | int ret; | ||
| 2182 | size_t buf_size; | ||
| 2183 | const size_t buf_limit = 16 * 1024 * 1024; | ||
| 2184 | |||
| 2185 | if (!capable(CAP_SYS_ADMIN)) | ||
| 2186 | return -EPERM; | ||
| 2187 | |||
| 2188 | /* copy search header and buffer size */ | ||
| 2189 | uarg = (struct btrfs_ioctl_search_args_v2 __user *)argp; | ||
| 2190 | if (copy_from_user(&args, uarg, sizeof(args))) | ||
| 2191 | return -EFAULT; | ||
| 2192 | |||
| 2193 | buf_size = args.buf_size; | ||
| 2194 | |||
| 2195 | if (buf_size < sizeof(struct btrfs_ioctl_search_header)) | ||
| 2196 | return -EOVERFLOW; | ||
| 2197 | |||
| 2198 | /* limit result size to 16MB */ | ||
| 2199 | if (buf_size > buf_limit) | ||
| 2200 | buf_size = buf_limit; | ||
| 2201 | |||
| 2202 | inode = file_inode(file); | ||
| 2203 | ret = search_ioctl(inode, &args.key, &buf_size, | ||
| 2204 | (char *)(&uarg->buf[0])); | ||
| 2205 | if (ret == 0 && copy_to_user(&uarg->key, &args.key, sizeof(args.key))) | ||
| 2206 | ret = -EFAULT; | ||
| 2207 | else if (ret == -EOVERFLOW && | ||
| 2208 | copy_to_user(&uarg->buf_size, &buf_size, sizeof(buf_size))) | ||
| 2209 | ret = -EFAULT; | ||
| 2210 | |||
| 2118 | return ret; | 2211 | return ret; |
| 2119 | } | 2212 | } |
| 2120 | 2213 | ||
| @@ -5198,6 +5291,8 @@ long btrfs_ioctl(struct file *file, unsigned int | |||
| 5198 | return btrfs_ioctl_trans_end(file); | 5291 | return btrfs_ioctl_trans_end(file); |
| 5199 | case BTRFS_IOC_TREE_SEARCH: | 5292 | case BTRFS_IOC_TREE_SEARCH: |
| 5200 | return btrfs_ioctl_tree_search(file, argp); | 5293 | return btrfs_ioctl_tree_search(file, argp); |
| 5294 | case BTRFS_IOC_TREE_SEARCH_V2: | ||
| 5295 | return btrfs_ioctl_tree_search_v2(file, argp); | ||
| 5201 | case BTRFS_IOC_INO_LOOKUP: | 5296 | case BTRFS_IOC_INO_LOOKUP: |
| 5202 | return btrfs_ioctl_ino_lookup(file, argp); | 5297 | return btrfs_ioctl_ino_lookup(file, argp); |
| 5203 | case BTRFS_IOC_INO_PATHS: | 5298 | case BTRFS_IOC_INO_PATHS: |
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index cf5aead95a7f..98cb6b2630f9 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c | |||
| @@ -1798,8 +1798,10 @@ static int qgroup_shared_accounting(struct btrfs_trans_handle *trans, | |||
| 1798 | return -ENOMEM; | 1798 | return -ENOMEM; |
| 1799 | 1799 | ||
| 1800 | tmp = ulist_alloc(GFP_NOFS); | 1800 | tmp = ulist_alloc(GFP_NOFS); |
| 1801 | if (!tmp) | 1801 | if (!tmp) { |
| 1802 | ulist_free(qgroups); | ||
| 1802 | return -ENOMEM; | 1803 | return -ENOMEM; |
| 1804 | } | ||
| 1803 | 1805 | ||
| 1804 | btrfs_get_tree_mod_seq(fs_info, &elem); | 1806 | btrfs_get_tree_mod_seq(fs_info, &elem); |
| 1805 | ret = btrfs_find_all_roots(trans, fs_info, oper->bytenr, elem.seq, | 1807 | ret = btrfs_find_all_roots(trans, fs_info, oper->bytenr, elem.seq, |
diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c index 30947f923620..09230cf3a244 100644 --- a/fs/btrfs/reada.c +++ b/fs/btrfs/reada.c | |||
| @@ -428,8 +428,13 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, | |||
| 428 | continue; | 428 | continue; |
| 429 | } | 429 | } |
| 430 | if (!dev->bdev) { | 430 | if (!dev->bdev) { |
| 431 | /* cannot read ahead on missing device */ | 431 | /* |
| 432 | continue; | 432 | * cannot read ahead on missing device, but for RAID5/6, |
| 433 | * REQ_GET_READ_MIRRORS return 1. So don't skip missing | ||
| 434 | * device for such case. | ||
| 435 | */ | ||
| 436 | if (nzones > 1) | ||
| 437 | continue; | ||
| 433 | } | 438 | } |
| 434 | if (dev_replace_is_ongoing && | 439 | if (dev_replace_is_ongoing && |
| 435 | dev == fs_info->dev_replace.tgtdev) { | 440 | dev == fs_info->dev_replace.tgtdev) { |
diff --git a/fs/btrfs/tests/btrfs-tests.c b/fs/btrfs/tests/btrfs-tests.c index a5dcacb5df9c..9626252ee6b4 100644 --- a/fs/btrfs/tests/btrfs-tests.c +++ b/fs/btrfs/tests/btrfs-tests.c | |||
| @@ -135,7 +135,7 @@ restart: | |||
| 135 | radix_tree_for_each_slot(slot, &fs_info->buffer_radix, &iter, 0) { | 135 | radix_tree_for_each_slot(slot, &fs_info->buffer_radix, &iter, 0) { |
| 136 | struct extent_buffer *eb; | 136 | struct extent_buffer *eb; |
| 137 | 137 | ||
| 138 | eb = radix_tree_deref_slot(slot); | 138 | eb = radix_tree_deref_slot_protected(slot, &fs_info->buffer_lock); |
| 139 | if (!eb) | 139 | if (!eb) |
| 140 | continue; | 140 | continue; |
| 141 | /* Shouldn't happen but that kind of thinking creates CVE's */ | 141 | /* Shouldn't happen but that kind of thinking creates CVE's */ |
diff --git a/fs/btrfs/tests/qgroup-tests.c b/fs/btrfs/tests/qgroup-tests.c index fa691b754aaf..ec3dcb202357 100644 --- a/fs/btrfs/tests/qgroup-tests.c +++ b/fs/btrfs/tests/qgroup-tests.c | |||
| @@ -415,6 +415,8 @@ int btrfs_test_qgroups(void) | |||
| 415 | ret = -ENOMEM; | 415 | ret = -ENOMEM; |
| 416 | goto out; | 416 | goto out; |
| 417 | } | 417 | } |
| 418 | btrfs_set_header_level(root->node, 0); | ||
| 419 | btrfs_set_header_nritems(root->node, 0); | ||
| 418 | root->alloc_bytenr += 8192; | 420 | root->alloc_bytenr += 8192; |
| 419 | 421 | ||
| 420 | tmp_root = btrfs_alloc_dummy_root(); | 422 | tmp_root = btrfs_alloc_dummy_root(); |
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 9630f10f8e1e..511839c04f11 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c | |||
| @@ -1284,11 +1284,13 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
| 1284 | goto fail; | 1284 | goto fail; |
| 1285 | } | 1285 | } |
| 1286 | 1286 | ||
| 1287 | pending->error = btrfs_qgroup_inherit(trans, fs_info, | 1287 | ret = btrfs_qgroup_inherit(trans, fs_info, |
| 1288 | root->root_key.objectid, | 1288 | root->root_key.objectid, |
| 1289 | objectid, pending->inherit); | 1289 | objectid, pending->inherit); |
| 1290 | if (pending->error) | 1290 | if (ret) { |
| 1291 | goto no_free_objectid; | 1291 | btrfs_abort_transaction(trans, root, ret); |
| 1292 | goto fail; | ||
| 1293 | } | ||
| 1292 | 1294 | ||
| 1293 | /* see comments in should_cow_block() */ | 1295 | /* see comments in should_cow_block() */ |
| 1294 | set_bit(BTRFS_ROOT_FORCE_COW, &root->state); | 1296 | set_bit(BTRFS_ROOT_FORCE_COW, &root->state); |
diff --git a/fs/ceph/acl.c b/fs/ceph/acl.c index 21887d63dad5..469f2e8657e8 100644 --- a/fs/ceph/acl.c +++ b/fs/ceph/acl.c | |||
| @@ -104,12 +104,6 @@ int ceph_set_acl(struct inode *inode, struct posix_acl *acl, int type) | |||
| 104 | umode_t new_mode = inode->i_mode, old_mode = inode->i_mode; | 104 | umode_t new_mode = inode->i_mode, old_mode = inode->i_mode; |
| 105 | struct dentry *dentry; | 105 | struct dentry *dentry; |
| 106 | 106 | ||
| 107 | if (acl) { | ||
| 108 | ret = posix_acl_valid(acl); | ||
| 109 | if (ret < 0) | ||
| 110 | goto out; | ||
| 111 | } | ||
| 112 | |||
| 113 | switch (type) { | 107 | switch (type) { |
| 114 | case ACL_TYPE_ACCESS: | 108 | case ACL_TYPE_ACCESS: |
| 115 | name = POSIX_ACL_XATTR_ACCESS; | 109 | name = POSIX_ACL_XATTR_ACCESS; |
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 4f3f69079f36..90b3954d48ed 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c | |||
| @@ -211,18 +211,15 @@ static int readpage_nounlock(struct file *filp, struct page *page) | |||
| 211 | SetPageError(page); | 211 | SetPageError(page); |
| 212 | ceph_fscache_readpage_cancel(inode, page); | 212 | ceph_fscache_readpage_cancel(inode, page); |
| 213 | goto out; | 213 | goto out; |
| 214 | } else { | ||
| 215 | if (err < PAGE_CACHE_SIZE) { | ||
| 216 | /* zero fill remainder of page */ | ||
| 217 | zero_user_segment(page, err, PAGE_CACHE_SIZE); | ||
| 218 | } else { | ||
| 219 | flush_dcache_page(page); | ||
| 220 | } | ||
| 221 | } | 214 | } |
| 222 | SetPageUptodate(page); | 215 | if (err < PAGE_CACHE_SIZE) |
| 216 | /* zero fill remainder of page */ | ||
| 217 | zero_user_segment(page, err, PAGE_CACHE_SIZE); | ||
| 218 | else | ||
| 219 | flush_dcache_page(page); | ||
| 223 | 220 | ||
| 224 | if (err >= 0) | 221 | SetPageUptodate(page); |
| 225 | ceph_readpage_to_fscache(inode, page); | 222 | ceph_readpage_to_fscache(inode, page); |
| 226 | 223 | ||
| 227 | out: | 224 | out: |
| 228 | return err < 0 ? err : 0; | 225 | return err < 0 ? err : 0; |
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index c561b628ebce..1fde164b74b5 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c | |||
| @@ -221,8 +221,8 @@ int ceph_unreserve_caps(struct ceph_mds_client *mdsc, | |||
| 221 | return 0; | 221 | return 0; |
| 222 | } | 222 | } |
| 223 | 223 | ||
| 224 | static struct ceph_cap *get_cap(struct ceph_mds_client *mdsc, | 224 | struct ceph_cap *ceph_get_cap(struct ceph_mds_client *mdsc, |
| 225 | struct ceph_cap_reservation *ctx) | 225 | struct ceph_cap_reservation *ctx) |
| 226 | { | 226 | { |
| 227 | struct ceph_cap *cap = NULL; | 227 | struct ceph_cap *cap = NULL; |
| 228 | 228 | ||
| @@ -508,15 +508,14 @@ static void __check_cap_issue(struct ceph_inode_info *ci, struct ceph_cap *cap, | |||
| 508 | * it is < 0. (This is so we can atomically add the cap and add an | 508 | * it is < 0. (This is so we can atomically add the cap and add an |
| 509 | * open file reference to it.) | 509 | * open file reference to it.) |
| 510 | */ | 510 | */ |
| 511 | int ceph_add_cap(struct inode *inode, | 511 | void ceph_add_cap(struct inode *inode, |
| 512 | struct ceph_mds_session *session, u64 cap_id, | 512 | struct ceph_mds_session *session, u64 cap_id, |
| 513 | int fmode, unsigned issued, unsigned wanted, | 513 | int fmode, unsigned issued, unsigned wanted, |
| 514 | unsigned seq, unsigned mseq, u64 realmino, int flags, | 514 | unsigned seq, unsigned mseq, u64 realmino, int flags, |
| 515 | struct ceph_cap_reservation *caps_reservation) | 515 | struct ceph_cap **new_cap) |
| 516 | { | 516 | { |
| 517 | struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; | 517 | struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; |
| 518 | struct ceph_inode_info *ci = ceph_inode(inode); | 518 | struct ceph_inode_info *ci = ceph_inode(inode); |
| 519 | struct ceph_cap *new_cap = NULL; | ||
| 520 | struct ceph_cap *cap; | 519 | struct ceph_cap *cap; |
| 521 | int mds = session->s_mds; | 520 | int mds = session->s_mds; |
| 522 | int actual_wanted; | 521 | int actual_wanted; |
| @@ -531,20 +530,10 @@ int ceph_add_cap(struct inode *inode, | |||
| 531 | if (fmode >= 0) | 530 | if (fmode >= 0) |
| 532 | wanted |= ceph_caps_for_mode(fmode); | 531 | wanted |= ceph_caps_for_mode(fmode); |
| 533 | 532 | ||
| 534 | retry: | ||
| 535 | spin_lock(&ci->i_ceph_lock); | ||
| 536 | cap = __get_cap_for_mds(ci, mds); | 533 | cap = __get_cap_for_mds(ci, mds); |
| 537 | if (!cap) { | 534 | if (!cap) { |
| 538 | if (new_cap) { | 535 | cap = *new_cap; |
| 539 | cap = new_cap; | 536 | *new_cap = NULL; |
| 540 | new_cap = NULL; | ||
| 541 | } else { | ||
| 542 | spin_unlock(&ci->i_ceph_lock); | ||
| 543 | new_cap = get_cap(mdsc, caps_reservation); | ||
| 544 | if (new_cap == NULL) | ||
| 545 | return -ENOMEM; | ||
| 546 | goto retry; | ||
| 547 | } | ||
| 548 | 537 | ||
| 549 | cap->issued = 0; | 538 | cap->issued = 0; |
| 550 | cap->implemented = 0; | 539 | cap->implemented = 0; |
| @@ -562,9 +551,6 @@ retry: | |||
| 562 | session->s_nr_caps++; | 551 | session->s_nr_caps++; |
| 563 | spin_unlock(&session->s_cap_lock); | 552 | spin_unlock(&session->s_cap_lock); |
| 564 | } else { | 553 | } else { |
| 565 | if (new_cap) | ||
| 566 | ceph_put_cap(mdsc, new_cap); | ||
| 567 | |||
| 568 | /* | 554 | /* |
| 569 | * auth mds of the inode changed. we received the cap export | 555 | * auth mds of the inode changed. we received the cap export |
| 570 | * message, but still haven't received the cap import message. | 556 | * message, but still haven't received the cap import message. |
| @@ -626,7 +612,6 @@ retry: | |||
| 626 | ci->i_auth_cap = cap; | 612 | ci->i_auth_cap = cap; |
| 627 | cap->mds_wanted = wanted; | 613 | cap->mds_wanted = wanted; |
| 628 | } | 614 | } |
| 629 | ci->i_cap_exporting_issued = 0; | ||
| 630 | } else { | 615 | } else { |
| 631 | WARN_ON(ci->i_auth_cap == cap); | 616 | WARN_ON(ci->i_auth_cap == cap); |
| 632 | } | 617 | } |
| @@ -648,9 +633,6 @@ retry: | |||
| 648 | 633 | ||
| 649 | if (fmode >= 0) | 634 | if (fmode >= 0) |
| 650 | __ceph_get_fmode(ci, fmode); | 635 | __ceph_get_fmode(ci, fmode); |
| 651 | spin_unlock(&ci->i_ceph_lock); | ||
| 652 | wake_up_all(&ci->i_cap_wq); | ||
| 653 | return 0; | ||
| 654 | } | 636 | } |
| 655 | 637 | ||
| 656 | /* | 638 | /* |
| @@ -685,7 +667,7 @@ static int __cap_is_valid(struct ceph_cap *cap) | |||
| 685 | */ | 667 | */ |
| 686 | int __ceph_caps_issued(struct ceph_inode_info *ci, int *implemented) | 668 | int __ceph_caps_issued(struct ceph_inode_info *ci, int *implemented) |
| 687 | { | 669 | { |
| 688 | int have = ci->i_snap_caps | ci->i_cap_exporting_issued; | 670 | int have = ci->i_snap_caps; |
| 689 | struct ceph_cap *cap; | 671 | struct ceph_cap *cap; |
| 690 | struct rb_node *p; | 672 | struct rb_node *p; |
| 691 | 673 | ||
| @@ -900,7 +882,7 @@ int __ceph_caps_mds_wanted(struct ceph_inode_info *ci) | |||
| 900 | */ | 882 | */ |
| 901 | static int __ceph_is_any_caps(struct ceph_inode_info *ci) | 883 | static int __ceph_is_any_caps(struct ceph_inode_info *ci) |
| 902 | { | 884 | { |
| 903 | return !RB_EMPTY_ROOT(&ci->i_caps) || ci->i_cap_exporting_issued; | 885 | return !RB_EMPTY_ROOT(&ci->i_caps); |
| 904 | } | 886 | } |
| 905 | 887 | ||
| 906 | int ceph_is_any_caps(struct inode *inode) | 888 | int ceph_is_any_caps(struct inode *inode) |
| @@ -2397,32 +2379,30 @@ static void invalidate_aliases(struct inode *inode) | |||
| 2397 | * actually be a revocation if it specifies a smaller cap set.) | 2379 | * actually be a revocation if it specifies a smaller cap set.) |
| 2398 | * | 2380 | * |
| 2399 | * caller holds s_mutex and i_ceph_lock, we drop both. | 2381 | * caller holds s_mutex and i_ceph_lock, we drop both. |
| 2400 | * | ||
| 2401 | * return value: | ||
| 2402 | * 0 - ok | ||
| 2403 | * 1 - check_caps on auth cap only (writeback) | ||
| 2404 | * 2 - check_caps (ack revoke) | ||
| 2405 | */ | 2382 | */ |
| 2406 | static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, | 2383 | static void handle_cap_grant(struct ceph_mds_client *mdsc, |
| 2384 | struct inode *inode, struct ceph_mds_caps *grant, | ||
| 2385 | void *snaptrace, int snaptrace_len, | ||
| 2386 | struct ceph_buffer *xattr_buf, | ||
| 2407 | struct ceph_mds_session *session, | 2387 | struct ceph_mds_session *session, |
| 2408 | struct ceph_cap *cap, | 2388 | struct ceph_cap *cap, int issued) |
| 2409 | struct ceph_buffer *xattr_buf) | 2389 | __releases(ci->i_ceph_lock) |
| 2410 | __releases(ci->i_ceph_lock) | ||
| 2411 | { | 2390 | { |
| 2412 | struct ceph_inode_info *ci = ceph_inode(inode); | 2391 | struct ceph_inode_info *ci = ceph_inode(inode); |
| 2413 | int mds = session->s_mds; | 2392 | int mds = session->s_mds; |
| 2414 | int seq = le32_to_cpu(grant->seq); | 2393 | int seq = le32_to_cpu(grant->seq); |
| 2415 | int newcaps = le32_to_cpu(grant->caps); | 2394 | int newcaps = le32_to_cpu(grant->caps); |
| 2416 | int issued, implemented, used, wanted, dirty; | 2395 | int used, wanted, dirty; |
| 2417 | u64 size = le64_to_cpu(grant->size); | 2396 | u64 size = le64_to_cpu(grant->size); |
| 2418 | u64 max_size = le64_to_cpu(grant->max_size); | 2397 | u64 max_size = le64_to_cpu(grant->max_size); |
| 2419 | struct timespec mtime, atime, ctime; | 2398 | struct timespec mtime, atime, ctime; |
| 2420 | int check_caps = 0; | 2399 | int check_caps = 0; |
| 2421 | int wake = 0; | 2400 | bool wake = 0; |
| 2422 | int writeback = 0; | 2401 | bool writeback = 0; |
| 2423 | int queue_invalidate = 0; | 2402 | bool queue_trunc = 0; |
| 2424 | int deleted_inode = 0; | 2403 | bool queue_invalidate = 0; |
| 2425 | int queue_revalidate = 0; | 2404 | bool queue_revalidate = 0; |
| 2405 | bool deleted_inode = 0; | ||
| 2426 | 2406 | ||
| 2427 | dout("handle_cap_grant inode %p cap %p mds%d seq %d %s\n", | 2407 | dout("handle_cap_grant inode %p cap %p mds%d seq %d %s\n", |
| 2428 | inode, cap, mds, seq, ceph_cap_string(newcaps)); | 2408 | inode, cap, mds, seq, ceph_cap_string(newcaps)); |
| @@ -2466,16 +2446,13 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, | |||
| 2466 | } | 2446 | } |
| 2467 | 2447 | ||
| 2468 | /* side effects now are allowed */ | 2448 | /* side effects now are allowed */ |
| 2469 | |||
| 2470 | issued = __ceph_caps_issued(ci, &implemented); | ||
| 2471 | issued |= implemented | __ceph_caps_dirty(ci); | ||
| 2472 | |||
| 2473 | cap->cap_gen = session->s_cap_gen; | 2449 | cap->cap_gen = session->s_cap_gen; |
| 2474 | cap->seq = seq; | 2450 | cap->seq = seq; |
| 2475 | 2451 | ||
| 2476 | __check_cap_issue(ci, cap, newcaps); | 2452 | __check_cap_issue(ci, cap, newcaps); |
| 2477 | 2453 | ||
| 2478 | if ((issued & CEPH_CAP_AUTH_EXCL) == 0) { | 2454 | if ((newcaps & CEPH_CAP_AUTH_SHARED) && |
| 2455 | (issued & CEPH_CAP_AUTH_EXCL) == 0) { | ||
| 2479 | inode->i_mode = le32_to_cpu(grant->mode); | 2456 | inode->i_mode = le32_to_cpu(grant->mode); |
| 2480 | inode->i_uid = make_kuid(&init_user_ns, le32_to_cpu(grant->uid)); | 2457 | inode->i_uid = make_kuid(&init_user_ns, le32_to_cpu(grant->uid)); |
| 2481 | inode->i_gid = make_kgid(&init_user_ns, le32_to_cpu(grant->gid)); | 2458 | inode->i_gid = make_kgid(&init_user_ns, le32_to_cpu(grant->gid)); |
| @@ -2484,7 +2461,8 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, | |||
| 2484 | from_kgid(&init_user_ns, inode->i_gid)); | 2461 | from_kgid(&init_user_ns, inode->i_gid)); |
| 2485 | } | 2462 | } |
| 2486 | 2463 | ||
| 2487 | if ((issued & CEPH_CAP_LINK_EXCL) == 0) { | 2464 | if ((newcaps & CEPH_CAP_AUTH_SHARED) && |
| 2465 | (issued & CEPH_CAP_LINK_EXCL) == 0) { | ||
| 2488 | set_nlink(inode, le32_to_cpu(grant->nlink)); | 2466 | set_nlink(inode, le32_to_cpu(grant->nlink)); |
| 2489 | if (inode->i_nlink == 0 && | 2467 | if (inode->i_nlink == 0 && |
| 2490 | (newcaps & (CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL))) | 2468 | (newcaps & (CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL))) |
| @@ -2511,30 +2489,35 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, | |||
| 2511 | if ((issued & CEPH_CAP_FILE_CACHE) && ci->i_rdcache_gen > 1) | 2489 | if ((issued & CEPH_CAP_FILE_CACHE) && ci->i_rdcache_gen > 1) |
| 2512 | queue_revalidate = 1; | 2490 | queue_revalidate = 1; |
| 2513 | 2491 | ||
| 2514 | /* size/ctime/mtime/atime? */ | 2492 | if (newcaps & CEPH_CAP_ANY_RD) { |
| 2515 | ceph_fill_file_size(inode, issued, | 2493 | /* ctime/mtime/atime? */ |
| 2516 | le32_to_cpu(grant->truncate_seq), | 2494 | ceph_decode_timespec(&mtime, &grant->mtime); |
| 2517 | le64_to_cpu(grant->truncate_size), size); | 2495 | ceph_decode_timespec(&atime, &grant->atime); |
| 2518 | ceph_decode_timespec(&mtime, &grant->mtime); | 2496 | ceph_decode_timespec(&ctime, &grant->ctime); |
| 2519 | ceph_decode_timespec(&atime, &grant->atime); | 2497 | ceph_fill_file_time(inode, issued, |
| 2520 | ceph_decode_timespec(&ctime, &grant->ctime); | 2498 | le32_to_cpu(grant->time_warp_seq), |
| 2521 | ceph_fill_file_time(inode, issued, | 2499 | &ctime, &mtime, &atime); |
| 2522 | le32_to_cpu(grant->time_warp_seq), &ctime, &mtime, | 2500 | } |
| 2523 | &atime); | 2501 | |
| 2524 | 2502 | if (newcaps & (CEPH_CAP_ANY_FILE_RD | CEPH_CAP_ANY_FILE_WR)) { | |
| 2525 | 2503 | /* file layout may have changed */ | |
| 2526 | /* file layout may have changed */ | 2504 | ci->i_layout = grant->layout; |
| 2527 | ci->i_layout = grant->layout; | 2505 | /* size/truncate_seq? */ |
| 2528 | 2506 | queue_trunc = ceph_fill_file_size(inode, issued, | |
| 2529 | /* max size increase? */ | 2507 | le32_to_cpu(grant->truncate_seq), |
| 2530 | if (ci->i_auth_cap == cap && max_size != ci->i_max_size) { | 2508 | le64_to_cpu(grant->truncate_size), |
| 2531 | dout("max_size %lld -> %llu\n", ci->i_max_size, max_size); | 2509 | size); |
| 2532 | ci->i_max_size = max_size; | 2510 | /* max size increase? */ |
| 2533 | if (max_size >= ci->i_wanted_max_size) { | 2511 | if (ci->i_auth_cap == cap && max_size != ci->i_max_size) { |
| 2534 | ci->i_wanted_max_size = 0; /* reset */ | 2512 | dout("max_size %lld -> %llu\n", |
| 2535 | ci->i_requested_max_size = 0; | 2513 | ci->i_max_size, max_size); |
| 2514 | ci->i_max_size = max_size; | ||
| 2515 | if (max_size >= ci->i_wanted_max_size) { | ||
| 2516 | ci->i_wanted_max_size = 0; /* reset */ | ||
| 2517 | ci->i_requested_max_size = 0; | ||
| 2518 | } | ||
| 2519 | wake = 1; | ||
| 2536 | } | 2520 | } |
| 2537 | wake = 1; | ||
| 2538 | } | 2521 | } |
| 2539 | 2522 | ||
| 2540 | /* check cap bits */ | 2523 | /* check cap bits */ |
| @@ -2595,6 +2578,23 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, | |||
| 2595 | 2578 | ||
| 2596 | spin_unlock(&ci->i_ceph_lock); | 2579 | spin_unlock(&ci->i_ceph_lock); |
| 2597 | 2580 | ||
| 2581 | if (le32_to_cpu(grant->op) == CEPH_CAP_OP_IMPORT) { | ||
| 2582 | down_write(&mdsc->snap_rwsem); | ||
| 2583 | ceph_update_snap_trace(mdsc, snaptrace, | ||
| 2584 | snaptrace + snaptrace_len, false); | ||
| 2585 | downgrade_write(&mdsc->snap_rwsem); | ||
| 2586 | kick_flushing_inode_caps(mdsc, session, inode); | ||
| 2587 | up_read(&mdsc->snap_rwsem); | ||
| 2588 | if (newcaps & ~issued) | ||
| 2589 | wake = 1; | ||
| 2590 | } | ||
| 2591 | |||
| 2592 | if (queue_trunc) { | ||
| 2593 | ceph_queue_vmtruncate(inode); | ||
| 2594 | ceph_queue_revalidate(inode); | ||
| 2595 | } else if (queue_revalidate) | ||
| 2596 | ceph_queue_revalidate(inode); | ||
| 2597 | |||
| 2598 | if (writeback) | 2598 | if (writeback) |
| 2599 | /* | 2599 | /* |
| 2600 | * queue inode for writeback: we can't actually call | 2600 | * queue inode for writeback: we can't actually call |
| @@ -2606,8 +2606,6 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, | |||
| 2606 | ceph_queue_invalidate(inode); | 2606 | ceph_queue_invalidate(inode); |
| 2607 | if (deleted_inode) | 2607 | if (deleted_inode) |
| 2608 | invalidate_aliases(inode); | 2608 | invalidate_aliases(inode); |
| 2609 | if (queue_revalidate) | ||
| 2610 | ceph_queue_revalidate(inode); | ||
| 2611 | if (wake) | 2609 | if (wake) |
| 2612 | wake_up_all(&ci->i_cap_wq); | 2610 | wake_up_all(&ci->i_cap_wq); |
| 2613 | 2611 | ||
| @@ -2784,7 +2782,7 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex, | |||
| 2784 | { | 2782 | { |
| 2785 | struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; | 2783 | struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; |
| 2786 | struct ceph_mds_session *tsession = NULL; | 2784 | struct ceph_mds_session *tsession = NULL; |
| 2787 | struct ceph_cap *cap, *tcap; | 2785 | struct ceph_cap *cap, *tcap, *new_cap = NULL; |
| 2788 | struct ceph_inode_info *ci = ceph_inode(inode); | 2786 | struct ceph_inode_info *ci = ceph_inode(inode); |
| 2789 | u64 t_cap_id; | 2787 | u64 t_cap_id; |
| 2790 | unsigned mseq = le32_to_cpu(ex->migrate_seq); | 2788 | unsigned mseq = le32_to_cpu(ex->migrate_seq); |
| @@ -2807,7 +2805,7 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex, | |||
| 2807 | retry: | 2805 | retry: |
| 2808 | spin_lock(&ci->i_ceph_lock); | 2806 | spin_lock(&ci->i_ceph_lock); |
| 2809 | cap = __get_cap_for_mds(ci, mds); | 2807 | cap = __get_cap_for_mds(ci, mds); |
| 2810 | if (!cap) | 2808 | if (!cap || cap->cap_id != le64_to_cpu(ex->cap_id)) |
| 2811 | goto out_unlock; | 2809 | goto out_unlock; |
| 2812 | 2810 | ||
| 2813 | if (target < 0) { | 2811 | if (target < 0) { |
| @@ -2846,15 +2844,14 @@ retry: | |||
| 2846 | } | 2844 | } |
| 2847 | __ceph_remove_cap(cap, false); | 2845 | __ceph_remove_cap(cap, false); |
| 2848 | goto out_unlock; | 2846 | goto out_unlock; |
| 2849 | } | 2847 | } else if (tsession) { |
| 2850 | |||
| 2851 | if (tsession) { | ||
| 2852 | int flag = (cap == ci->i_auth_cap) ? CEPH_CAP_FLAG_AUTH : 0; | ||
| 2853 | spin_unlock(&ci->i_ceph_lock); | ||
| 2854 | /* add placeholder for the export tagert */ | 2848 | /* add placeholder for the export tagert */ |
| 2849 | int flag = (cap == ci->i_auth_cap) ? CEPH_CAP_FLAG_AUTH : 0; | ||
| 2855 | ceph_add_cap(inode, tsession, t_cap_id, -1, issued, 0, | 2850 | ceph_add_cap(inode, tsession, t_cap_id, -1, issued, 0, |
| 2856 | t_seq - 1, t_mseq, (u64)-1, flag, NULL); | 2851 | t_seq - 1, t_mseq, (u64)-1, flag, &new_cap); |
| 2857 | goto retry; | 2852 | |
| 2853 | __ceph_remove_cap(cap, false); | ||
| 2854 | goto out_unlock; | ||
| 2858 | } | 2855 | } |
| 2859 | 2856 | ||
| 2860 | spin_unlock(&ci->i_ceph_lock); | 2857 | spin_unlock(&ci->i_ceph_lock); |
| @@ -2873,6 +2870,7 @@ retry: | |||
| 2873 | SINGLE_DEPTH_NESTING); | 2870 | SINGLE_DEPTH_NESTING); |
| 2874 | } | 2871 | } |
| 2875 | ceph_add_cap_releases(mdsc, tsession); | 2872 | ceph_add_cap_releases(mdsc, tsession); |
| 2873 | new_cap = ceph_get_cap(mdsc, NULL); | ||
| 2876 | } else { | 2874 | } else { |
| 2877 | WARN_ON(1); | 2875 | WARN_ON(1); |
| 2878 | tsession = NULL; | 2876 | tsession = NULL; |
| @@ -2887,24 +2885,27 @@ out_unlock: | |||
| 2887 | mutex_unlock(&tsession->s_mutex); | 2885 | mutex_unlock(&tsession->s_mutex); |
| 2888 | ceph_put_mds_session(tsession); | 2886 | ceph_put_mds_session(tsession); |
| 2889 | } | 2887 | } |
| 2888 | if (new_cap) | ||
| 2889 | ceph_put_cap(mdsc, new_cap); | ||
| 2890 | } | 2890 | } |
| 2891 | 2891 | ||
| 2892 | /* | 2892 | /* |
| 2893 | * Handle cap IMPORT. If there are temp bits from an older EXPORT, | 2893 | * Handle cap IMPORT. |
| 2894 | * clean them up. | ||
| 2895 | * | 2894 | * |
| 2896 | * caller holds s_mutex. | 2895 | * caller holds s_mutex. acquires i_ceph_lock |
| 2897 | */ | 2896 | */ |
| 2898 | static void handle_cap_import(struct ceph_mds_client *mdsc, | 2897 | static void handle_cap_import(struct ceph_mds_client *mdsc, |
| 2899 | struct inode *inode, struct ceph_mds_caps *im, | 2898 | struct inode *inode, struct ceph_mds_caps *im, |
| 2900 | struct ceph_mds_cap_peer *ph, | 2899 | struct ceph_mds_cap_peer *ph, |
| 2901 | struct ceph_mds_session *session, | 2900 | struct ceph_mds_session *session, |
| 2902 | void *snaptrace, int snaptrace_len) | 2901 | struct ceph_cap **target_cap, int *old_issued) |
| 2902 | __acquires(ci->i_ceph_lock) | ||
| 2903 | { | 2903 | { |
| 2904 | struct ceph_inode_info *ci = ceph_inode(inode); | 2904 | struct ceph_inode_info *ci = ceph_inode(inode); |
| 2905 | struct ceph_cap *cap; | 2905 | struct ceph_cap *cap, *ocap, *new_cap = NULL; |
| 2906 | int mds = session->s_mds; | 2906 | int mds = session->s_mds; |
| 2907 | unsigned issued = le32_to_cpu(im->caps); | 2907 | int issued; |
| 2908 | unsigned caps = le32_to_cpu(im->caps); | ||
| 2908 | unsigned wanted = le32_to_cpu(im->wanted); | 2909 | unsigned wanted = le32_to_cpu(im->wanted); |
| 2909 | unsigned seq = le32_to_cpu(im->seq); | 2910 | unsigned seq = le32_to_cpu(im->seq); |
| 2910 | unsigned mseq = le32_to_cpu(im->migrate_seq); | 2911 | unsigned mseq = le32_to_cpu(im->migrate_seq); |
| @@ -2924,40 +2925,52 @@ static void handle_cap_import(struct ceph_mds_client *mdsc, | |||
| 2924 | dout("handle_cap_import inode %p ci %p mds%d mseq %d peer %d\n", | 2925 | dout("handle_cap_import inode %p ci %p mds%d mseq %d peer %d\n", |
| 2925 | inode, ci, mds, mseq, peer); | 2926 | inode, ci, mds, mseq, peer); |
| 2926 | 2927 | ||
| 2928 | retry: | ||
| 2927 | spin_lock(&ci->i_ceph_lock); | 2929 | spin_lock(&ci->i_ceph_lock); |
| 2928 | cap = peer >= 0 ? __get_cap_for_mds(ci, peer) : NULL; | 2930 | cap = __get_cap_for_mds(ci, mds); |
| 2929 | if (cap && cap->cap_id == p_cap_id) { | 2931 | if (!cap) { |
| 2932 | if (!new_cap) { | ||
| 2933 | spin_unlock(&ci->i_ceph_lock); | ||
| 2934 | new_cap = ceph_get_cap(mdsc, NULL); | ||
| 2935 | goto retry; | ||
| 2936 | } | ||
| 2937 | cap = new_cap; | ||
| 2938 | } else { | ||
| 2939 | if (new_cap) { | ||
| 2940 | ceph_put_cap(mdsc, new_cap); | ||
| 2941 | new_cap = NULL; | ||
| 2942 | } | ||
| 2943 | } | ||
| 2944 | |||
| 2945 | __ceph_caps_issued(ci, &issued); | ||
| 2946 | issued |= __ceph_caps_dirty(ci); | ||
| 2947 | |||
| 2948 | ceph_add_cap(inode, session, cap_id, -1, caps, wanted, seq, mseq, | ||
| 2949 | realmino, CEPH_CAP_FLAG_AUTH, &new_cap); | ||
| 2950 | |||
| 2951 | ocap = peer >= 0 ? __get_cap_for_mds(ci, peer) : NULL; | ||
| 2952 | if (ocap && ocap->cap_id == p_cap_id) { | ||
| 2930 | dout(" remove export cap %p mds%d flags %d\n", | 2953 | dout(" remove export cap %p mds%d flags %d\n", |
| 2931 | cap, peer, ph->flags); | 2954 | ocap, peer, ph->flags); |
| 2932 | if ((ph->flags & CEPH_CAP_FLAG_AUTH) && | 2955 | if ((ph->flags & CEPH_CAP_FLAG_AUTH) && |
| 2933 | (cap->seq != le32_to_cpu(ph->seq) || | 2956 | (ocap->seq != le32_to_cpu(ph->seq) || |
| 2934 | cap->mseq != le32_to_cpu(ph->mseq))) { | 2957 | ocap->mseq != le32_to_cpu(ph->mseq))) { |
| 2935 | pr_err("handle_cap_import: mismatched seq/mseq: " | 2958 | pr_err("handle_cap_import: mismatched seq/mseq: " |
| 2936 | "ino (%llx.%llx) mds%d seq %d mseq %d " | 2959 | "ino (%llx.%llx) mds%d seq %d mseq %d " |
| 2937 | "importer mds%d has peer seq %d mseq %d\n", | 2960 | "importer mds%d has peer seq %d mseq %d\n", |
| 2938 | ceph_vinop(inode), peer, cap->seq, | 2961 | ceph_vinop(inode), peer, ocap->seq, |
| 2939 | cap->mseq, mds, le32_to_cpu(ph->seq), | 2962 | ocap->mseq, mds, le32_to_cpu(ph->seq), |
| 2940 | le32_to_cpu(ph->mseq)); | 2963 | le32_to_cpu(ph->mseq)); |
| 2941 | } | 2964 | } |
| 2942 | ci->i_cap_exporting_issued = cap->issued; | 2965 | __ceph_remove_cap(ocap, (ph->flags & CEPH_CAP_FLAG_RELEASE)); |
| 2943 | __ceph_remove_cap(cap, (ph->flags & CEPH_CAP_FLAG_RELEASE)); | ||
| 2944 | } | 2966 | } |
| 2945 | 2967 | ||
| 2946 | /* make sure we re-request max_size, if necessary */ | 2968 | /* make sure we re-request max_size, if necessary */ |
| 2947 | ci->i_wanted_max_size = 0; | 2969 | ci->i_wanted_max_size = 0; |
| 2948 | ci->i_requested_max_size = 0; | 2970 | ci->i_requested_max_size = 0; |
| 2949 | spin_unlock(&ci->i_ceph_lock); | ||
| 2950 | |||
| 2951 | down_write(&mdsc->snap_rwsem); | ||
| 2952 | ceph_update_snap_trace(mdsc, snaptrace, snaptrace+snaptrace_len, | ||
| 2953 | false); | ||
| 2954 | downgrade_write(&mdsc->snap_rwsem); | ||
| 2955 | ceph_add_cap(inode, session, cap_id, -1, | ||
| 2956 | issued, wanted, seq, mseq, realmino, CEPH_CAP_FLAG_AUTH, | ||
| 2957 | NULL /* no caps context */); | ||
| 2958 | kick_flushing_inode_caps(mdsc, session, inode); | ||
| 2959 | up_read(&mdsc->snap_rwsem); | ||
| 2960 | 2971 | ||
| 2972 | *old_issued = issued; | ||
| 2973 | *target_cap = cap; | ||
| 2961 | } | 2974 | } |
| 2962 | 2975 | ||
| 2963 | /* | 2976 | /* |
| @@ -2977,7 +2990,7 @@ void ceph_handle_caps(struct ceph_mds_session *session, | |||
| 2977 | struct ceph_mds_caps *h; | 2990 | struct ceph_mds_caps *h; |
| 2978 | struct ceph_mds_cap_peer *peer = NULL; | 2991 | struct ceph_mds_cap_peer *peer = NULL; |
| 2979 | int mds = session->s_mds; | 2992 | int mds = session->s_mds; |
| 2980 | int op; | 2993 | int op, issued; |
| 2981 | u32 seq, mseq; | 2994 | u32 seq, mseq; |
| 2982 | struct ceph_vino vino; | 2995 | struct ceph_vino vino; |
| 2983 | u64 cap_id; | 2996 | u64 cap_id; |
| @@ -3069,7 +3082,10 @@ void ceph_handle_caps(struct ceph_mds_session *session, | |||
| 3069 | 3082 | ||
| 3070 | case CEPH_CAP_OP_IMPORT: | 3083 | case CEPH_CAP_OP_IMPORT: |
| 3071 | handle_cap_import(mdsc, inode, h, peer, session, | 3084 | handle_cap_import(mdsc, inode, h, peer, session, |
| 3072 | snaptrace, snaptrace_len); | 3085 | &cap, &issued); |
| 3086 | handle_cap_grant(mdsc, inode, h, snaptrace, snaptrace_len, | ||
| 3087 | msg->middle, session, cap, issued); | ||
| 3088 | goto done_unlocked; | ||
| 3073 | } | 3089 | } |
| 3074 | 3090 | ||
| 3075 | /* the rest require a cap */ | 3091 | /* the rest require a cap */ |
| @@ -3086,8 +3102,10 @@ void ceph_handle_caps(struct ceph_mds_session *session, | |||
| 3086 | switch (op) { | 3102 | switch (op) { |
| 3087 | case CEPH_CAP_OP_REVOKE: | 3103 | case CEPH_CAP_OP_REVOKE: |
| 3088 | case CEPH_CAP_OP_GRANT: | 3104 | case CEPH_CAP_OP_GRANT: |
| 3089 | case CEPH_CAP_OP_IMPORT: | 3105 | __ceph_caps_issued(ci, &issued); |
| 3090 | handle_cap_grant(inode, h, session, cap, msg->middle); | 3106 | issued |= __ceph_caps_dirty(ci); |
| 3107 | handle_cap_grant(mdsc, inode, h, NULL, 0, msg->middle, | ||
| 3108 | session, cap, issued); | ||
| 3091 | goto done_unlocked; | 3109 | goto done_unlocked; |
| 3092 | 3110 | ||
| 3093 | case CEPH_CAP_OP_FLUSH_ACK: | 3111 | case CEPH_CAP_OP_FLUSH_ACK: |
diff --git a/fs/ceph/export.c b/fs/ceph/export.c index 00d6af6a32ec..8d7d782f4382 100644 --- a/fs/ceph/export.c +++ b/fs/ceph/export.c | |||
| @@ -169,7 +169,7 @@ static struct dentry *__get_parent(struct super_block *sb, | |||
| 169 | return dentry; | 169 | return dentry; |
| 170 | } | 170 | } |
| 171 | 171 | ||
| 172 | struct dentry *ceph_get_parent(struct dentry *child) | 172 | static struct dentry *ceph_get_parent(struct dentry *child) |
| 173 | { | 173 | { |
| 174 | /* don't re-export snaps */ | 174 | /* don't re-export snaps */ |
| 175 | if (ceph_snap(child->d_inode) != CEPH_NOSNAP) | 175 | if (ceph_snap(child->d_inode) != CEPH_NOSNAP) |
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index e4fff9ff1c27..04c89c266cec 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c | |||
| @@ -10,6 +10,7 @@ | |||
| 10 | #include <linux/writeback.h> | 10 | #include <linux/writeback.h> |
| 11 | #include <linux/vmalloc.h> | 11 | #include <linux/vmalloc.h> |
| 12 | #include <linux/posix_acl.h> | 12 | #include <linux/posix_acl.h> |
| 13 | #include <linux/random.h> | ||
| 13 | 14 | ||
| 14 | #include "super.h" | 15 | #include "super.h" |
| 15 | #include "mds_client.h" | 16 | #include "mds_client.h" |
| @@ -179,9 +180,8 @@ struct ceph_inode_frag *__ceph_find_frag(struct ceph_inode_info *ci, u32 f) | |||
| 179 | * specified, copy the frag delegation info to the caller if | 180 | * specified, copy the frag delegation info to the caller if |
| 180 | * it is present. | 181 | * it is present. |
| 181 | */ | 182 | */ |
| 182 | u32 ceph_choose_frag(struct ceph_inode_info *ci, u32 v, | 183 | static u32 __ceph_choose_frag(struct ceph_inode_info *ci, u32 v, |
| 183 | struct ceph_inode_frag *pfrag, | 184 | struct ceph_inode_frag *pfrag, int *found) |
| 184 | int *found) | ||
| 185 | { | 185 | { |
| 186 | u32 t = ceph_frag_make(0, 0); | 186 | u32 t = ceph_frag_make(0, 0); |
| 187 | struct ceph_inode_frag *frag; | 187 | struct ceph_inode_frag *frag; |
| @@ -191,7 +191,6 @@ u32 ceph_choose_frag(struct ceph_inode_info *ci, u32 v, | |||
| 191 | if (found) | 191 | if (found) |
| 192 | *found = 0; | 192 | *found = 0; |
| 193 | 193 | ||
| 194 | mutex_lock(&ci->i_fragtree_mutex); | ||
| 195 | while (1) { | 194 | while (1) { |
| 196 | WARN_ON(!ceph_frag_contains_value(t, v)); | 195 | WARN_ON(!ceph_frag_contains_value(t, v)); |
| 197 | frag = __ceph_find_frag(ci, t); | 196 | frag = __ceph_find_frag(ci, t); |
| @@ -220,10 +219,19 @@ u32 ceph_choose_frag(struct ceph_inode_info *ci, u32 v, | |||
| 220 | } | 219 | } |
| 221 | dout("choose_frag(%x) = %x\n", v, t); | 220 | dout("choose_frag(%x) = %x\n", v, t); |
| 222 | 221 | ||
| 223 | mutex_unlock(&ci->i_fragtree_mutex); | ||
| 224 | return t; | 222 | return t; |
| 225 | } | 223 | } |
| 226 | 224 | ||
| 225 | u32 ceph_choose_frag(struct ceph_inode_info *ci, u32 v, | ||
| 226 | struct ceph_inode_frag *pfrag, int *found) | ||
| 227 | { | ||
| 228 | u32 ret; | ||
| 229 | mutex_lock(&ci->i_fragtree_mutex); | ||
| 230 | ret = __ceph_choose_frag(ci, v, pfrag, found); | ||
| 231 | mutex_unlock(&ci->i_fragtree_mutex); | ||
| 232 | return ret; | ||
| 233 | } | ||
| 234 | |||
| 227 | /* | 235 | /* |
| 228 | * Process dirfrag (delegation) info from the mds. Include leaf | 236 | * Process dirfrag (delegation) info from the mds. Include leaf |
| 229 | * fragment in tree ONLY if ndist > 0. Otherwise, only | 237 | * fragment in tree ONLY if ndist > 0. Otherwise, only |
| @@ -237,11 +245,17 @@ static int ceph_fill_dirfrag(struct inode *inode, | |||
| 237 | u32 id = le32_to_cpu(dirinfo->frag); | 245 | u32 id = le32_to_cpu(dirinfo->frag); |
| 238 | int mds = le32_to_cpu(dirinfo->auth); | 246 | int mds = le32_to_cpu(dirinfo->auth); |
| 239 | int ndist = le32_to_cpu(dirinfo->ndist); | 247 | int ndist = le32_to_cpu(dirinfo->ndist); |
| 248 | int diri_auth = -1; | ||
| 240 | int i; | 249 | int i; |
| 241 | int err = 0; | 250 | int err = 0; |
| 242 | 251 | ||
| 252 | spin_lock(&ci->i_ceph_lock); | ||
| 253 | if (ci->i_auth_cap) | ||
| 254 | diri_auth = ci->i_auth_cap->mds; | ||
| 255 | spin_unlock(&ci->i_ceph_lock); | ||
| 256 | |||
| 243 | mutex_lock(&ci->i_fragtree_mutex); | 257 | mutex_lock(&ci->i_fragtree_mutex); |
| 244 | if (ndist == 0) { | 258 | if (ndist == 0 && mds == diri_auth) { |
| 245 | /* no delegation info needed. */ | 259 | /* no delegation info needed. */ |
| 246 | frag = __ceph_find_frag(ci, id); | 260 | frag = __ceph_find_frag(ci, id); |
| 247 | if (!frag) | 261 | if (!frag) |
| @@ -286,6 +300,75 @@ out: | |||
| 286 | return err; | 300 | return err; |
| 287 | } | 301 | } |
| 288 | 302 | ||
| 303 | static int ceph_fill_fragtree(struct inode *inode, | ||
| 304 | struct ceph_frag_tree_head *fragtree, | ||
| 305 | struct ceph_mds_reply_dirfrag *dirinfo) | ||
| 306 | { | ||
| 307 | struct ceph_inode_info *ci = ceph_inode(inode); | ||
| 308 | struct ceph_inode_frag *frag; | ||
| 309 | struct rb_node *rb_node; | ||
| 310 | int i; | ||
| 311 | u32 id, nsplits; | ||
| 312 | bool update = false; | ||
| 313 | |||
| 314 | mutex_lock(&ci->i_fragtree_mutex); | ||
| 315 | nsplits = le32_to_cpu(fragtree->nsplits); | ||
| 316 | if (nsplits) { | ||
| 317 | i = prandom_u32() % nsplits; | ||
| 318 | id = le32_to_cpu(fragtree->splits[i].frag); | ||
| 319 | if (!__ceph_find_frag(ci, id)) | ||
| 320 | update = true; | ||
| 321 | } else if (!RB_EMPTY_ROOT(&ci->i_fragtree)) { | ||
| 322 | rb_node = rb_first(&ci->i_fragtree); | ||
| 323 | frag = rb_entry(rb_node, struct ceph_inode_frag, node); | ||
| 324 | if (frag->frag != ceph_frag_make(0, 0) || rb_next(rb_node)) | ||
| 325 | update = true; | ||
| 326 | } | ||
| 327 | if (!update && dirinfo) { | ||
| 328 | id = le32_to_cpu(dirinfo->frag); | ||
| 329 | if (id != __ceph_choose_frag(ci, id, NULL, NULL)) | ||
| 330 | update = true; | ||
| 331 | } | ||
| 332 | if (!update) | ||
| 333 | goto out_unlock; | ||
| 334 | |||
| 335 | dout("fill_fragtree %llx.%llx\n", ceph_vinop(inode)); | ||
| 336 | rb_node = rb_first(&ci->i_fragtree); | ||
| 337 | for (i = 0; i < nsplits; i++) { | ||
| 338 | id = le32_to_cpu(fragtree->splits[i].frag); | ||
| 339 | frag = NULL; | ||
| 340 | while (rb_node) { | ||
| 341 | frag = rb_entry(rb_node, struct ceph_inode_frag, node); | ||
| 342 | if (ceph_frag_compare(frag->frag, id) >= 0) { | ||
| 343 | if (frag->frag != id) | ||
| 344 | frag = NULL; | ||
| 345 | else | ||
| 346 | rb_node = rb_next(rb_node); | ||
| 347 | break; | ||
| 348 | } | ||
| 349 | rb_node = rb_next(rb_node); | ||
| 350 | rb_erase(&frag->node, &ci->i_fragtree); | ||
| 351 | kfree(frag); | ||
| 352 | frag = NULL; | ||
| 353 | } | ||
| 354 | if (!frag) { | ||
| 355 | frag = __get_or_create_frag(ci, id); | ||
| 356 | if (IS_ERR(frag)) | ||
| 357 | continue; | ||
| 358 | } | ||
| 359 | frag->split_by = le32_to_cpu(fragtree->splits[i].by); | ||
| 360 | dout(" frag %x split by %d\n", frag->frag, frag->split_by); | ||
| 361 | } | ||
| 362 | while (rb_node) { | ||
| 363 | frag = rb_entry(rb_node, struct ceph_inode_frag, node); | ||
| 364 | rb_node = rb_next(rb_node); | ||
| 365 | rb_erase(&frag->node, &ci->i_fragtree); | ||
| 366 | kfree(frag); | ||
| 367 | } | ||
| 368 | out_unlock: | ||
| 369 | mutex_unlock(&ci->i_fragtree_mutex); | ||
| 370 | return 0; | ||
| 371 | } | ||
| 289 | 372 | ||
| 290 | /* | 373 | /* |
| 291 | * initialize a newly allocated inode. | 374 | * initialize a newly allocated inode. |
| @@ -341,7 +424,6 @@ struct inode *ceph_alloc_inode(struct super_block *sb) | |||
| 341 | INIT_LIST_HEAD(&ci->i_cap_snaps); | 424 | INIT_LIST_HEAD(&ci->i_cap_snaps); |
| 342 | ci->i_head_snapc = NULL; | 425 | ci->i_head_snapc = NULL; |
| 343 | ci->i_snap_caps = 0; | 426 | ci->i_snap_caps = 0; |
| 344 | ci->i_cap_exporting_issued = 0; | ||
| 345 | 427 | ||
| 346 | for (i = 0; i < CEPH_FILE_MODE_NUM; i++) | 428 | for (i = 0; i < CEPH_FILE_MODE_NUM; i++) |
| 347 | ci->i_nr_by_mode[i] = 0; | 429 | ci->i_nr_by_mode[i] = 0; |
| @@ -407,7 +489,7 @@ void ceph_destroy_inode(struct inode *inode) | |||
| 407 | 489 | ||
| 408 | /* | 490 | /* |
| 409 | * we may still have a snap_realm reference if there are stray | 491 | * we may still have a snap_realm reference if there are stray |
| 410 | * caps in i_cap_exporting_issued or i_snap_caps. | 492 | * caps in i_snap_caps. |
| 411 | */ | 493 | */ |
| 412 | if (ci->i_snap_realm) { | 494 | if (ci->i_snap_realm) { |
| 413 | struct ceph_mds_client *mdsc = | 495 | struct ceph_mds_client *mdsc = |
| @@ -582,22 +664,26 @@ static int fill_inode(struct inode *inode, | |||
| 582 | unsigned long ttl_from, int cap_fmode, | 664 | unsigned long ttl_from, int cap_fmode, |
| 583 | struct ceph_cap_reservation *caps_reservation) | 665 | struct ceph_cap_reservation *caps_reservation) |
| 584 | { | 666 | { |
| 667 | struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; | ||
| 585 | struct ceph_mds_reply_inode *info = iinfo->in; | 668 | struct ceph_mds_reply_inode *info = iinfo->in; |
| 586 | struct ceph_inode_info *ci = ceph_inode(inode); | 669 | struct ceph_inode_info *ci = ceph_inode(inode); |
| 587 | int i; | 670 | int issued = 0, implemented, new_issued; |
| 588 | int issued = 0, implemented; | ||
| 589 | struct timespec mtime, atime, ctime; | 671 | struct timespec mtime, atime, ctime; |
| 590 | u32 nsplits; | ||
| 591 | struct ceph_inode_frag *frag; | ||
| 592 | struct rb_node *rb_node; | ||
| 593 | struct ceph_buffer *xattr_blob = NULL; | 672 | struct ceph_buffer *xattr_blob = NULL; |
| 673 | struct ceph_cap *new_cap = NULL; | ||
| 594 | int err = 0; | 674 | int err = 0; |
| 595 | int queue_trunc = 0; | 675 | bool wake = false; |
| 676 | bool queue_trunc = false; | ||
| 677 | bool new_version = false; | ||
| 596 | 678 | ||
| 597 | dout("fill_inode %p ino %llx.%llx v %llu had %llu\n", | 679 | dout("fill_inode %p ino %llx.%llx v %llu had %llu\n", |
| 598 | inode, ceph_vinop(inode), le64_to_cpu(info->version), | 680 | inode, ceph_vinop(inode), le64_to_cpu(info->version), |
| 599 | ci->i_version); | 681 | ci->i_version); |
| 600 | 682 | ||
| 683 | /* prealloc new cap struct */ | ||
| 684 | if (info->cap.caps && ceph_snap(inode) == CEPH_NOSNAP) | ||
| 685 | new_cap = ceph_get_cap(mdsc, caps_reservation); | ||
| 686 | |||
| 601 | /* | 687 | /* |
| 602 | * prealloc xattr data, if it looks like we'll need it. only | 688 | * prealloc xattr data, if it looks like we'll need it. only |
| 603 | * if len > 4 (meaning there are actually xattrs; the first 4 | 689 | * if len > 4 (meaning there are actually xattrs; the first 4 |
| @@ -623,19 +709,23 @@ static int fill_inode(struct inode *inode, | |||
| 623 | * 3 2 skip | 709 | * 3 2 skip |
| 624 | * 3 3 update | 710 | * 3 3 update |
| 625 | */ | 711 | */ |
| 626 | if (le64_to_cpu(info->version) > 0 && | 712 | if (ci->i_version == 0 || |
| 627 | (ci->i_version & ~1) >= le64_to_cpu(info->version)) | 713 | ((info->cap.flags & CEPH_CAP_FLAG_AUTH) && |
| 628 | goto no_change; | 714 | le64_to_cpu(info->version) > (ci->i_version & ~1))) |
| 629 | 715 | new_version = true; | |
| 716 | |||
| 630 | issued = __ceph_caps_issued(ci, &implemented); | 717 | issued = __ceph_caps_issued(ci, &implemented); |
| 631 | issued |= implemented | __ceph_caps_dirty(ci); | 718 | issued |= implemented | __ceph_caps_dirty(ci); |
| 719 | new_issued = ~issued & le32_to_cpu(info->cap.caps); | ||
| 632 | 720 | ||
| 633 | /* update inode */ | 721 | /* update inode */ |
| 634 | ci->i_version = le64_to_cpu(info->version); | 722 | ci->i_version = le64_to_cpu(info->version); |
| 635 | inode->i_version++; | 723 | inode->i_version++; |
| 636 | inode->i_rdev = le32_to_cpu(info->rdev); | 724 | inode->i_rdev = le32_to_cpu(info->rdev); |
| 725 | inode->i_blkbits = fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1; | ||
| 637 | 726 | ||
| 638 | if ((issued & CEPH_CAP_AUTH_EXCL) == 0) { | 727 | if ((new_version || (new_issued & CEPH_CAP_AUTH_SHARED)) && |
| 728 | (issued & CEPH_CAP_AUTH_EXCL) == 0) { | ||
| 639 | inode->i_mode = le32_to_cpu(info->mode); | 729 | inode->i_mode = le32_to_cpu(info->mode); |
| 640 | inode->i_uid = make_kuid(&init_user_ns, le32_to_cpu(info->uid)); | 730 | inode->i_uid = make_kuid(&init_user_ns, le32_to_cpu(info->uid)); |
| 641 | inode->i_gid = make_kgid(&init_user_ns, le32_to_cpu(info->gid)); | 731 | inode->i_gid = make_kgid(&init_user_ns, le32_to_cpu(info->gid)); |
| @@ -644,23 +734,35 @@ static int fill_inode(struct inode *inode, | |||
| 644 | from_kgid(&init_user_ns, inode->i_gid)); | 734 | from_kgid(&init_user_ns, inode->i_gid)); |
| 645 | } | 735 | } |
| 646 | 736 | ||
| 647 | if ((issued & CEPH_CAP_LINK_EXCL) == 0) | 737 | if ((new_version || (new_issued & CEPH_CAP_LINK_SHARED)) && |
| 738 | (issued & CEPH_CAP_LINK_EXCL) == 0) | ||
| 648 | set_nlink(inode, le32_to_cpu(info->nlink)); | 739 | set_nlink(inode, le32_to_cpu(info->nlink)); |
| 649 | 740 | ||
| 650 | /* be careful with mtime, atime, size */ | 741 | if (new_version || (new_issued & CEPH_CAP_ANY_RD)) { |
| 651 | ceph_decode_timespec(&atime, &info->atime); | 742 | /* be careful with mtime, atime, size */ |
| 652 | ceph_decode_timespec(&mtime, &info->mtime); | 743 | ceph_decode_timespec(&atime, &info->atime); |
| 653 | ceph_decode_timespec(&ctime, &info->ctime); | 744 | ceph_decode_timespec(&mtime, &info->mtime); |
| 654 | queue_trunc = ceph_fill_file_size(inode, issued, | 745 | ceph_decode_timespec(&ctime, &info->ctime); |
| 655 | le32_to_cpu(info->truncate_seq), | 746 | ceph_fill_file_time(inode, issued, |
| 656 | le64_to_cpu(info->truncate_size), | 747 | le32_to_cpu(info->time_warp_seq), |
| 657 | le64_to_cpu(info->size)); | 748 | &ctime, &mtime, &atime); |
| 658 | ceph_fill_file_time(inode, issued, | 749 | } |
| 659 | le32_to_cpu(info->time_warp_seq), | 750 | |
| 660 | &ctime, &mtime, &atime); | 751 | if (new_version || |
| 661 | 752 | (new_issued & (CEPH_CAP_ANY_FILE_RD | CEPH_CAP_ANY_FILE_WR))) { | |
| 662 | ci->i_layout = info->layout; | 753 | ci->i_layout = info->layout; |
| 663 | inode->i_blkbits = fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1; | 754 | queue_trunc = ceph_fill_file_size(inode, issued, |
| 755 | le32_to_cpu(info->truncate_seq), | ||
| 756 | le64_to_cpu(info->truncate_size), | ||
| 757 | le64_to_cpu(info->size)); | ||
| 758 | /* only update max_size on auth cap */ | ||
| 759 | if ((info->cap.flags & CEPH_CAP_FLAG_AUTH) && | ||
| 760 | ci->i_max_size != le64_to_cpu(info->max_size)) { | ||
| 761 | dout("max_size %lld -> %llu\n", ci->i_max_size, | ||
| 762 | le64_to_cpu(info->max_size)); | ||
| 763 | ci->i_max_size = le64_to_cpu(info->max_size); | ||
| 764 | } | ||
| 765 | } | ||
| 664 | 766 | ||
| 665 | /* xattrs */ | 767 | /* xattrs */ |
| 666 | /* note that if i_xattrs.len <= 4, i_xattrs.data will still be NULL. */ | 768 | /* note that if i_xattrs.len <= 4, i_xattrs.data will still be NULL. */ |
| @@ -745,58 +847,6 @@ static int fill_inode(struct inode *inode, | |||
| 745 | dout(" marking %p complete (empty)\n", inode); | 847 | dout(" marking %p complete (empty)\n", inode); |
| 746 | __ceph_dir_set_complete(ci, atomic_read(&ci->i_release_count)); | 848 | __ceph_dir_set_complete(ci, atomic_read(&ci->i_release_count)); |
| 747 | } | 849 | } |
| 748 | no_change: | ||
| 749 | /* only update max_size on auth cap */ | ||
| 750 | if ((info->cap.flags & CEPH_CAP_FLAG_AUTH) && | ||
| 751 | ci->i_max_size != le64_to_cpu(info->max_size)) { | ||
| 752 | dout("max_size %lld -> %llu\n", ci->i_max_size, | ||
| 753 | le64_to_cpu(info->max_size)); | ||
| 754 | ci->i_max_size = le64_to_cpu(info->max_size); | ||
| 755 | } | ||
| 756 | |||
| 757 | spin_unlock(&ci->i_ceph_lock); | ||
| 758 | |||
| 759 | /* queue truncate if we saw i_size decrease */ | ||
| 760 | if (queue_trunc) | ||
| 761 | ceph_queue_vmtruncate(inode); | ||
| 762 | |||
| 763 | /* populate frag tree */ | ||
| 764 | /* FIXME: move me up, if/when version reflects fragtree changes */ | ||
| 765 | nsplits = le32_to_cpu(info->fragtree.nsplits); | ||
| 766 | mutex_lock(&ci->i_fragtree_mutex); | ||
| 767 | rb_node = rb_first(&ci->i_fragtree); | ||
| 768 | for (i = 0; i < nsplits; i++) { | ||
| 769 | u32 id = le32_to_cpu(info->fragtree.splits[i].frag); | ||
| 770 | frag = NULL; | ||
| 771 | while (rb_node) { | ||
| 772 | frag = rb_entry(rb_node, struct ceph_inode_frag, node); | ||
| 773 | if (ceph_frag_compare(frag->frag, id) >= 0) { | ||
| 774 | if (frag->frag != id) | ||
| 775 | frag = NULL; | ||
| 776 | else | ||
| 777 | rb_node = rb_next(rb_node); | ||
| 778 | break; | ||
| 779 | } | ||
| 780 | rb_node = rb_next(rb_node); | ||
| 781 | rb_erase(&frag->node, &ci->i_fragtree); | ||
| 782 | kfree(frag); | ||
| 783 | frag = NULL; | ||
| 784 | } | ||
| 785 | if (!frag) { | ||
| 786 | frag = __get_or_create_frag(ci, id); | ||
| 787 | if (IS_ERR(frag)) | ||
| 788 | continue; | ||
| 789 | } | ||
| 790 | frag->split_by = le32_to_cpu(info->fragtree.splits[i].by); | ||
| 791 | dout(" frag %x split by %d\n", frag->frag, frag->split_by); | ||
| 792 | } | ||
| 793 | while (rb_node) { | ||
| 794 | frag = rb_entry(rb_node, struct ceph_inode_frag, node); | ||
| 795 | rb_node = rb_next(rb_node); | ||
| 796 | rb_erase(&frag->node, &ci->i_fragtree); | ||
| 797 | kfree(frag); | ||
| 798 | } | ||
| 799 | mutex_unlock(&ci->i_fragtree_mutex); | ||
| 800 | 850 | ||
| 801 | /* were we issued a capability? */ | 851 | /* were we issued a capability? */ |
| 802 | if (info->cap.caps) { | 852 | if (info->cap.caps) { |
| @@ -809,30 +859,41 @@ no_change: | |||
| 809 | le32_to_cpu(info->cap.seq), | 859 | le32_to_cpu(info->cap.seq), |
| 810 | le32_to_cpu(info->cap.mseq), | 860 | le32_to_cpu(info->cap.mseq), |
| 811 | le64_to_cpu(info->cap.realm), | 861 | le64_to_cpu(info->cap.realm), |
| 812 | info->cap.flags, | 862 | info->cap.flags, &new_cap); |
| 813 | caps_reservation); | 863 | wake = true; |
| 814 | } else { | 864 | } else { |
| 815 | spin_lock(&ci->i_ceph_lock); | ||
| 816 | dout(" %p got snap_caps %s\n", inode, | 865 | dout(" %p got snap_caps %s\n", inode, |
| 817 | ceph_cap_string(le32_to_cpu(info->cap.caps))); | 866 | ceph_cap_string(le32_to_cpu(info->cap.caps))); |
| 818 | ci->i_snap_caps |= le32_to_cpu(info->cap.caps); | 867 | ci->i_snap_caps |= le32_to_cpu(info->cap.caps); |
| 819 | if (cap_fmode >= 0) | 868 | if (cap_fmode >= 0) |
| 820 | __ceph_get_fmode(ci, cap_fmode); | 869 | __ceph_get_fmode(ci, cap_fmode); |
| 821 | spin_unlock(&ci->i_ceph_lock); | ||
| 822 | } | 870 | } |
| 823 | } else if (cap_fmode >= 0) { | 871 | } else if (cap_fmode >= 0) { |
| 824 | pr_warn("mds issued no caps on %llx.%llx\n", | 872 | pr_warn("mds issued no caps on %llx.%llx\n", |
| 825 | ceph_vinop(inode)); | 873 | ceph_vinop(inode)); |
| 826 | __ceph_get_fmode(ci, cap_fmode); | 874 | __ceph_get_fmode(ci, cap_fmode); |
| 827 | } | 875 | } |
| 876 | spin_unlock(&ci->i_ceph_lock); | ||
| 877 | |||
| 878 | if (wake) | ||
| 879 | wake_up_all(&ci->i_cap_wq); | ||
| 880 | |||
| 881 | /* queue truncate if we saw i_size decrease */ | ||
| 882 | if (queue_trunc) | ||
| 883 | ceph_queue_vmtruncate(inode); | ||
| 884 | |||
| 885 | /* populate frag tree */ | ||
| 886 | if (S_ISDIR(inode->i_mode)) | ||
| 887 | ceph_fill_fragtree(inode, &info->fragtree, dirinfo); | ||
| 828 | 888 | ||
| 829 | /* update delegation info? */ | 889 | /* update delegation info? */ |
| 830 | if (dirinfo) | 890 | if (dirinfo) |
| 831 | ceph_fill_dirfrag(inode, dirinfo); | 891 | ceph_fill_dirfrag(inode, dirinfo); |
| 832 | 892 | ||
| 833 | err = 0; | 893 | err = 0; |
| 834 | |||
| 835 | out: | 894 | out: |
| 895 | if (new_cap) | ||
| 896 | ceph_put_cap(mdsc, new_cap); | ||
| 836 | if (xattr_blob) | 897 | if (xattr_blob) |
| 837 | ceph_buffer_put(xattr_blob); | 898 | ceph_buffer_put(xattr_blob); |
| 838 | return err; | 899 | return err; |
| @@ -1485,7 +1546,7 @@ static void ceph_invalidate_work(struct work_struct *work) | |||
| 1485 | orig_gen = ci->i_rdcache_gen; | 1546 | orig_gen = ci->i_rdcache_gen; |
| 1486 | spin_unlock(&ci->i_ceph_lock); | 1547 | spin_unlock(&ci->i_ceph_lock); |
| 1487 | 1548 | ||
| 1488 | truncate_inode_pages(inode->i_mapping, 0); | 1549 | truncate_pagecache(inode, 0); |
| 1489 | 1550 | ||
| 1490 | spin_lock(&ci->i_ceph_lock); | 1551 | spin_lock(&ci->i_ceph_lock); |
| 1491 | if (orig_gen == ci->i_rdcache_gen && | 1552 | if (orig_gen == ci->i_rdcache_gen && |
| @@ -1588,7 +1649,7 @@ retry: | |||
| 1588 | ci->i_truncate_pending, to); | 1649 | ci->i_truncate_pending, to); |
| 1589 | spin_unlock(&ci->i_ceph_lock); | 1650 | spin_unlock(&ci->i_ceph_lock); |
| 1590 | 1651 | ||
| 1591 | truncate_inode_pages(inode->i_mapping, to); | 1652 | truncate_pagecache(inode, to); |
| 1592 | 1653 | ||
| 1593 | spin_lock(&ci->i_ceph_lock); | 1654 | spin_lock(&ci->i_ceph_lock); |
| 1594 | if (to == ci->i_truncate_size) { | 1655 | if (to == ci->i_truncate_size) { |
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 9a33b98cb000..92a2548278fc 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c | |||
| @@ -1558,6 +1558,8 @@ ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode) | |||
| 1558 | init_completion(&req->r_safe_completion); | 1558 | init_completion(&req->r_safe_completion); |
| 1559 | INIT_LIST_HEAD(&req->r_unsafe_item); | 1559 | INIT_LIST_HEAD(&req->r_unsafe_item); |
| 1560 | 1560 | ||
| 1561 | req->r_stamp = CURRENT_TIME; | ||
| 1562 | |||
| 1561 | req->r_op = op; | 1563 | req->r_op = op; |
| 1562 | req->r_direct_mode = mode; | 1564 | req->r_direct_mode = mode; |
| 1563 | return req; | 1565 | return req; |
| @@ -1783,7 +1785,8 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc, | |||
| 1783 | } | 1785 | } |
| 1784 | 1786 | ||
| 1785 | len = sizeof(*head) + | 1787 | len = sizeof(*head) + |
| 1786 | pathlen1 + pathlen2 + 2*(1 + sizeof(u32) + sizeof(u64)); | 1788 | pathlen1 + pathlen2 + 2*(1 + sizeof(u32) + sizeof(u64)) + |
| 1789 | sizeof(struct timespec); | ||
| 1787 | 1790 | ||
| 1788 | /* calculate (max) length for cap releases */ | 1791 | /* calculate (max) length for cap releases */ |
| 1789 | len += sizeof(struct ceph_mds_request_release) * | 1792 | len += sizeof(struct ceph_mds_request_release) * |
| @@ -1800,6 +1803,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc, | |||
| 1800 | goto out_free2; | 1803 | goto out_free2; |
| 1801 | } | 1804 | } |
| 1802 | 1805 | ||
| 1806 | msg->hdr.version = 2; | ||
| 1803 | msg->hdr.tid = cpu_to_le64(req->r_tid); | 1807 | msg->hdr.tid = cpu_to_le64(req->r_tid); |
| 1804 | 1808 | ||
| 1805 | head = msg->front.iov_base; | 1809 | head = msg->front.iov_base; |
| @@ -1836,6 +1840,9 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc, | |||
| 1836 | mds, req->r_old_inode_drop, req->r_old_inode_unless, 0); | 1840 | mds, req->r_old_inode_drop, req->r_old_inode_unless, 0); |
| 1837 | head->num_releases = cpu_to_le16(releases); | 1841 | head->num_releases = cpu_to_le16(releases); |
| 1838 | 1842 | ||
| 1843 | /* time stamp */ | ||
| 1844 | ceph_encode_copy(&p, &req->r_stamp, sizeof(req->r_stamp)); | ||
| 1845 | |||
| 1839 | BUG_ON(p > end); | 1846 | BUG_ON(p > end); |
| 1840 | msg->front.iov_len = p - msg->front.iov_base; | 1847 | msg->front.iov_len = p - msg->front.iov_base; |
| 1841 | msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); | 1848 | msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); |
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index e90cfccf93bd..e00737cf523c 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h | |||
| @@ -194,6 +194,7 @@ struct ceph_mds_request { | |||
| 194 | int r_fmode; /* file mode, if expecting cap */ | 194 | int r_fmode; /* file mode, if expecting cap */ |
| 195 | kuid_t r_uid; | 195 | kuid_t r_uid; |
| 196 | kgid_t r_gid; | 196 | kgid_t r_gid; |
| 197 | struct timespec r_stamp; | ||
| 197 | 198 | ||
| 198 | /* for choosing which mds to send this request to */ | 199 | /* for choosing which mds to send this request to */ |
| 199 | int r_direct_mode; | 200 | int r_direct_mode; |
diff --git a/fs/ceph/super.h b/fs/ceph/super.h index ead05cc1f447..12b20744e386 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h | |||
| @@ -292,7 +292,6 @@ struct ceph_inode_info { | |||
| 292 | struct ceph_snap_context *i_head_snapc; /* set if wr_buffer_head > 0 or | 292 | struct ceph_snap_context *i_head_snapc; /* set if wr_buffer_head > 0 or |
| 293 | dirty|flushing caps */ | 293 | dirty|flushing caps */ |
| 294 | unsigned i_snap_caps; /* cap bits for snapped files */ | 294 | unsigned i_snap_caps; /* cap bits for snapped files */ |
| 295 | unsigned i_cap_exporting_issued; | ||
| 296 | 295 | ||
| 297 | int i_nr_by_mode[CEPH_FILE_MODE_NUM]; /* open file counts */ | 296 | int i_nr_by_mode[CEPH_FILE_MODE_NUM]; /* open file counts */ |
| 298 | 297 | ||
| @@ -775,11 +774,13 @@ static inline void ceph_forget_all_cached_acls(struct inode *inode) | |||
| 775 | extern const char *ceph_cap_string(int c); | 774 | extern const char *ceph_cap_string(int c); |
| 776 | extern void ceph_handle_caps(struct ceph_mds_session *session, | 775 | extern void ceph_handle_caps(struct ceph_mds_session *session, |
| 777 | struct ceph_msg *msg); | 776 | struct ceph_msg *msg); |
| 778 | extern int ceph_add_cap(struct inode *inode, | 777 | extern struct ceph_cap *ceph_get_cap(struct ceph_mds_client *mdsc, |
| 779 | struct ceph_mds_session *session, u64 cap_id, | 778 | struct ceph_cap_reservation *ctx); |
| 780 | int fmode, unsigned issued, unsigned wanted, | 779 | extern void ceph_add_cap(struct inode *inode, |
| 781 | unsigned cap, unsigned seq, u64 realmino, int flags, | 780 | struct ceph_mds_session *session, u64 cap_id, |
| 782 | struct ceph_cap_reservation *caps_reservation); | 781 | int fmode, unsigned issued, unsigned wanted, |
| 782 | unsigned cap, unsigned seq, u64 realmino, int flags, | ||
| 783 | struct ceph_cap **new_cap); | ||
| 783 | extern void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release); | 784 | extern void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release); |
| 784 | extern void ceph_put_cap(struct ceph_mds_client *mdsc, | 785 | extern void ceph_put_cap(struct ceph_mds_client *mdsc, |
| 785 | struct ceph_cap *cap); | 786 | struct ceph_cap *cap); |
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index 1e5b45359509..d08e079ea5d3 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c | |||
| @@ -617,6 +617,11 @@ static void retry_failed_sctp_send(struct connection *recv_con, | |||
| 617 | int nodeid = sn_send_failed->ssf_info.sinfo_ppid; | 617 | int nodeid = sn_send_failed->ssf_info.sinfo_ppid; |
| 618 | 618 | ||
| 619 | log_print("Retry sending %d bytes to node id %d", len, nodeid); | 619 | log_print("Retry sending %d bytes to node id %d", len, nodeid); |
| 620 | |||
| 621 | if (!nodeid) { | ||
| 622 | log_print("Shouldn't resend data via listening connection."); | ||
| 623 | return; | ||
| 624 | } | ||
| 620 | 625 | ||
| 621 | con = nodeid2con(nodeid, 0); | 626 | con = nodeid2con(nodeid, 0); |
| 622 | if (!con) { | 627 | if (!con) { |
diff --git a/fs/eventpoll.c b/fs/eventpoll.c index b73e0621ce9e..b10b48c2a7af 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c | |||
| @@ -910,7 +910,7 @@ static const struct file_operations eventpoll_fops = { | |||
| 910 | void eventpoll_release_file(struct file *file) | 910 | void eventpoll_release_file(struct file *file) |
| 911 | { | 911 | { |
| 912 | struct eventpoll *ep; | 912 | struct eventpoll *ep; |
| 913 | struct epitem *epi; | 913 | struct epitem *epi, *next; |
| 914 | 914 | ||
| 915 | /* | 915 | /* |
| 916 | * We don't want to get "file->f_lock" because it is not | 916 | * We don't want to get "file->f_lock" because it is not |
| @@ -926,7 +926,7 @@ void eventpoll_release_file(struct file *file) | |||
| 926 | * Besides, ep_remove() acquires the lock, so we can't hold it here. | 926 | * Besides, ep_remove() acquires the lock, so we can't hold it here. |
| 927 | */ | 927 | */ |
| 928 | mutex_lock(&epmutex); | 928 | mutex_lock(&epmutex); |
| 929 | list_for_each_entry_rcu(epi, &file->f_ep_links, fllink) { | 929 | list_for_each_entry_safe(epi, next, &file->f_ep_links, fllink) { |
| 930 | ep = epi->ep; | 930 | ep = epi->ep; |
| 931 | mutex_lock_nested(&ep->mtx, 0); | 931 | mutex_lock_nested(&ep->mtx, 0); |
| 932 | ep_remove(ep, epi); | 932 | ep_remove(ep, epi); |
