aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/aio.c70
-rw-r--r--fs/btrfs/extent_io.c39
-rw-r--r--fs/btrfs/extent_io.h3
-rw-r--r--fs/btrfs/ioctl.c147
-rw-r--r--fs/btrfs/qgroup.c4
-rw-r--r--fs/btrfs/reada.c9
-rw-r--r--fs/btrfs/tests/btrfs-tests.c2
-rw-r--r--fs/btrfs/tests/qgroup-tests.c2
-rw-r--r--fs/btrfs/transaction.c12
-rw-r--r--fs/ceph/acl.c6
-rw-r--r--fs/ceph/addr.c17
-rw-r--r--fs/ceph/caps.c246
-rw-r--r--fs/ceph/export.c2
-rw-r--r--fs/ceph/inode.c247
-rw-r--r--fs/ceph/mds_client.c9
-rw-r--r--fs/ceph/mds_client.h1
-rw-r--r--fs/ceph/super.h13
-rw-r--r--fs/dlm/lowcomms.c5
-rw-r--r--fs/eventpoll.c4
19 files changed, 535 insertions, 303 deletions
diff --git a/fs/aio.c b/fs/aio.c
index 56b28607c32d..4f078c054b41 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -477,7 +477,7 @@ void kiocb_set_cancel_fn(struct kiocb *req, kiocb_cancel_fn *cancel)
477} 477}
478EXPORT_SYMBOL(kiocb_set_cancel_fn); 478EXPORT_SYMBOL(kiocb_set_cancel_fn);
479 479
480static int kiocb_cancel(struct kioctx *ctx, struct kiocb *kiocb) 480static int kiocb_cancel(struct kiocb *kiocb)
481{ 481{
482 kiocb_cancel_fn *old, *cancel; 482 kiocb_cancel_fn *old, *cancel;
483 483
@@ -538,7 +538,7 @@ static void free_ioctx_users(struct percpu_ref *ref)
538 struct kiocb, ki_list); 538 struct kiocb, ki_list);
539 539
540 list_del_init(&req->ki_list); 540 list_del_init(&req->ki_list);
541 kiocb_cancel(ctx, req); 541 kiocb_cancel(req);
542 } 542 }
543 543
544 spin_unlock_irq(&ctx->ctx_lock); 544 spin_unlock_irq(&ctx->ctx_lock);
@@ -727,42 +727,42 @@ err:
727 * when the processes owning a context have all exited to encourage 727 * when the processes owning a context have all exited to encourage
728 * the rapid destruction of the kioctx. 728 * the rapid destruction of the kioctx.
729 */ 729 */
730static void kill_ioctx(struct mm_struct *mm, struct kioctx *ctx, 730static int kill_ioctx(struct mm_struct *mm, struct kioctx *ctx,
731 struct completion *requests_done) 731 struct completion *requests_done)
732{ 732{
733 if (!atomic_xchg(&ctx->dead, 1)) { 733 struct kioctx_table *table;
734 struct kioctx_table *table;
735 734
736 spin_lock(&mm->ioctx_lock); 735 if (atomic_xchg(&ctx->dead, 1))
737 rcu_read_lock(); 736 return -EINVAL;
738 table = rcu_dereference(mm->ioctx_table);
739 737
740 WARN_ON(ctx != table->table[ctx->id]);
741 table->table[ctx->id] = NULL;
742 rcu_read_unlock();
743 spin_unlock(&mm->ioctx_lock);
744 738
745 /* percpu_ref_kill() will do the necessary call_rcu() */ 739 spin_lock(&mm->ioctx_lock);
746 wake_up_all(&ctx->wait); 740 rcu_read_lock();
741 table = rcu_dereference(mm->ioctx_table);
747 742
748 /* 743 WARN_ON(ctx != table->table[ctx->id]);
749 * It'd be more correct to do this in free_ioctx(), after all 744 table->table[ctx->id] = NULL;
750 * the outstanding kiocbs have finished - but by then io_destroy 745 rcu_read_unlock();
751 * has already returned, so io_setup() could potentially return 746 spin_unlock(&mm->ioctx_lock);
752 * -EAGAIN with no ioctxs actually in use (as far as userspace
753 * could tell).
754 */
755 aio_nr_sub(ctx->max_reqs);
756 747
757 if (ctx->mmap_size) 748 /* percpu_ref_kill() will do the necessary call_rcu() */
758 vm_munmap(ctx->mmap_base, ctx->mmap_size); 749 wake_up_all(&ctx->wait);
759 750
760 ctx->requests_done = requests_done; 751 /*
761 percpu_ref_kill(&ctx->users); 752 * It'd be more correct to do this in free_ioctx(), after all
762 } else { 753 * the outstanding kiocbs have finished - but by then io_destroy
763 if (requests_done) 754 * has already returned, so io_setup() could potentially return
764 complete(requests_done); 755 * -EAGAIN with no ioctxs actually in use (as far as userspace
765 } 756 * could tell).
757 */
758 aio_nr_sub(ctx->max_reqs);
759
760 if (ctx->mmap_size)
761 vm_munmap(ctx->mmap_base, ctx->mmap_size);
762
763 ctx->requests_done = requests_done;
764 percpu_ref_kill(&ctx->users);
765 return 0;
766} 766}
767 767
768/* wait_on_sync_kiocb: 768/* wait_on_sync_kiocb:
@@ -1219,21 +1219,23 @@ SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx)
1219 if (likely(NULL != ioctx)) { 1219 if (likely(NULL != ioctx)) {
1220 struct completion requests_done = 1220 struct completion requests_done =
1221 COMPLETION_INITIALIZER_ONSTACK(requests_done); 1221 COMPLETION_INITIALIZER_ONSTACK(requests_done);
1222 int ret;
1222 1223
1223 /* Pass requests_done to kill_ioctx() where it can be set 1224 /* Pass requests_done to kill_ioctx() where it can be set
1224 * in a thread-safe way. If we try to set it here then we have 1225 * in a thread-safe way. If we try to set it here then we have
1225 * a race condition if two io_destroy() called simultaneously. 1226 * a race condition if two io_destroy() called simultaneously.
1226 */ 1227 */
1227 kill_ioctx(current->mm, ioctx, &requests_done); 1228 ret = kill_ioctx(current->mm, ioctx, &requests_done);
1228 percpu_ref_put(&ioctx->users); 1229 percpu_ref_put(&ioctx->users);
1229 1230
1230 /* Wait until all IO for the context are done. Otherwise kernel 1231 /* Wait until all IO for the context are done. Otherwise kernel
1231 * keep using user-space buffers even if user thinks the context 1232 * keep using user-space buffers even if user thinks the context
1232 * is destroyed. 1233 * is destroyed.
1233 */ 1234 */
1234 wait_for_completion(&requests_done); 1235 if (!ret)
1236 wait_for_completion(&requests_done);
1235 1237
1236 return 0; 1238 return ret;
1237 } 1239 }
1238 pr_debug("EINVAL: io_destroy: invalid context id\n"); 1240 pr_debug("EINVAL: io_destroy: invalid context id\n");
1239 return -EINVAL; 1241 return -EINVAL;
@@ -1595,7 +1597,7 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb,
1595 1597
1596 kiocb = lookup_kiocb(ctx, iocb, key); 1598 kiocb = lookup_kiocb(ctx, iocb, key);
1597 if (kiocb) 1599 if (kiocb)
1598 ret = kiocb_cancel(ctx, kiocb); 1600 ret = kiocb_cancel(kiocb);
1599 else 1601 else
1600 ret = -EINVAL; 1602 ret = -EINVAL;
1601 1603
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index f25a9092b946..a389820d158b 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2354,7 +2354,7 @@ int end_extent_writepage(struct page *page, int err, u64 start, u64 end)
2354{ 2354{
2355 int uptodate = (err == 0); 2355 int uptodate = (err == 0);
2356 struct extent_io_tree *tree; 2356 struct extent_io_tree *tree;
2357 int ret; 2357 int ret = 0;
2358 2358
2359 tree = &BTRFS_I(page->mapping->host)->io_tree; 2359 tree = &BTRFS_I(page->mapping->host)->io_tree;
2360 2360
@@ -5068,6 +5068,43 @@ void read_extent_buffer(struct extent_buffer *eb, void *dstv,
5068 } 5068 }
5069} 5069}
5070 5070
5071int read_extent_buffer_to_user(struct extent_buffer *eb, void __user *dstv,
5072 unsigned long start,
5073 unsigned long len)
5074{
5075 size_t cur;
5076 size_t offset;
5077 struct page *page;
5078 char *kaddr;
5079 char __user *dst = (char __user *)dstv;
5080 size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
5081 unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
5082 int ret = 0;
5083
5084 WARN_ON(start > eb->len);
5085 WARN_ON(start + len > eb->start + eb->len);
5086
5087 offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1);
5088
5089 while (len > 0) {
5090 page = extent_buffer_page(eb, i);
5091
5092 cur = min(len, (PAGE_CACHE_SIZE - offset));
5093 kaddr = page_address(page);
5094 if (copy_to_user(dst, kaddr + offset, cur)) {
5095 ret = -EFAULT;
5096 break;
5097 }
5098
5099 dst += cur;
5100 len -= cur;
5101 offset = 0;
5102 i++;
5103 }
5104
5105 return ret;
5106}
5107
5071int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start, 5108int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start,
5072 unsigned long min_len, char **map, 5109 unsigned long min_len, char **map,
5073 unsigned long *map_start, 5110 unsigned long *map_start,
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 8b63f2d46518..15ce5f2a2b62 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -304,6 +304,9 @@ int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv,
304void read_extent_buffer(struct extent_buffer *eb, void *dst, 304void read_extent_buffer(struct extent_buffer *eb, void *dst,
305 unsigned long start, 305 unsigned long start,
306 unsigned long len); 306 unsigned long len);
307int read_extent_buffer_to_user(struct extent_buffer *eb, void __user *dst,
308 unsigned long start,
309 unsigned long len);
307void write_extent_buffer(struct extent_buffer *eb, const void *src, 310void write_extent_buffer(struct extent_buffer *eb, const void *src,
308 unsigned long start, unsigned long len); 311 unsigned long start, unsigned long len);
309void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src, 312void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 82c18ba12e3f..0d321c23069a 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1957,7 +1957,8 @@ static noinline int copy_to_sk(struct btrfs_root *root,
1957 struct btrfs_path *path, 1957 struct btrfs_path *path,
1958 struct btrfs_key *key, 1958 struct btrfs_key *key,
1959 struct btrfs_ioctl_search_key *sk, 1959 struct btrfs_ioctl_search_key *sk,
1960 char *buf, 1960 size_t *buf_size,
1961 char __user *ubuf,
1961 unsigned long *sk_offset, 1962 unsigned long *sk_offset,
1962 int *num_found) 1963 int *num_found)
1963{ 1964{
@@ -1989,13 +1990,25 @@ static noinline int copy_to_sk(struct btrfs_root *root,
1989 if (!key_in_sk(key, sk)) 1990 if (!key_in_sk(key, sk))
1990 continue; 1991 continue;
1991 1992
1992 if (sizeof(sh) + item_len > BTRFS_SEARCH_ARGS_BUFSIZE) 1993 if (sizeof(sh) + item_len > *buf_size) {
1994 if (*num_found) {
1995 ret = 1;
1996 goto out;
1997 }
1998
1999 /*
2000 * return one empty item back for v1, which does not
2001 * handle -EOVERFLOW
2002 */
2003
2004 *buf_size = sizeof(sh) + item_len;
1993 item_len = 0; 2005 item_len = 0;
2006 ret = -EOVERFLOW;
2007 }
1994 2008
1995 if (sizeof(sh) + item_len + *sk_offset > 2009 if (sizeof(sh) + item_len + *sk_offset > *buf_size) {
1996 BTRFS_SEARCH_ARGS_BUFSIZE) {
1997 ret = 1; 2010 ret = 1;
1998 goto overflow; 2011 goto out;
1999 } 2012 }
2000 2013
2001 sh.objectid = key->objectid; 2014 sh.objectid = key->objectid;
@@ -2005,20 +2018,33 @@ static noinline int copy_to_sk(struct btrfs_root *root,
2005 sh.transid = found_transid; 2018 sh.transid = found_transid;
2006 2019
2007 /* copy search result header */ 2020 /* copy search result header */
2008 memcpy(buf + *sk_offset, &sh, sizeof(sh)); 2021 if (copy_to_user(ubuf + *sk_offset, &sh, sizeof(sh))) {
2022 ret = -EFAULT;
2023 goto out;
2024 }
2025
2009 *sk_offset += sizeof(sh); 2026 *sk_offset += sizeof(sh);
2010 2027
2011 if (item_len) { 2028 if (item_len) {
2012 char *p = buf + *sk_offset; 2029 char __user *up = ubuf + *sk_offset;
2013 /* copy the item */ 2030 /* copy the item */
2014 read_extent_buffer(leaf, p, 2031 if (read_extent_buffer_to_user(leaf, up,
2015 item_off, item_len); 2032 item_off, item_len)) {
2033 ret = -EFAULT;
2034 goto out;
2035 }
2036
2016 *sk_offset += item_len; 2037 *sk_offset += item_len;
2017 } 2038 }
2018 (*num_found)++; 2039 (*num_found)++;
2019 2040
2020 if (*num_found >= sk->nr_items) 2041 if (ret) /* -EOVERFLOW from above */
2021 break; 2042 goto out;
2043
2044 if (*num_found >= sk->nr_items) {
2045 ret = 1;
2046 goto out;
2047 }
2022 } 2048 }
2023advance_key: 2049advance_key:
2024 ret = 0; 2050 ret = 0;
@@ -2033,22 +2059,37 @@ advance_key:
2033 key->objectid++; 2059 key->objectid++;
2034 } else 2060 } else
2035 ret = 1; 2061 ret = 1;
2036overflow: 2062out:
2063 /*
2064 * 0: all items from this leaf copied, continue with next
2065 * 1: * more items can be copied, but unused buffer is too small
2066 * * all items were found
2067 * Either way, it will stops the loop which iterates to the next
2068 * leaf
2069 * -EOVERFLOW: item was to large for buffer
2070 * -EFAULT: could not copy extent buffer back to userspace
2071 */
2037 return ret; 2072 return ret;
2038} 2073}
2039 2074
2040static noinline int search_ioctl(struct inode *inode, 2075static noinline int search_ioctl(struct inode *inode,
2041 struct btrfs_ioctl_search_args *args) 2076 struct btrfs_ioctl_search_key *sk,
2077 size_t *buf_size,
2078 char __user *ubuf)
2042{ 2079{
2043 struct btrfs_root *root; 2080 struct btrfs_root *root;
2044 struct btrfs_key key; 2081 struct btrfs_key key;
2045 struct btrfs_path *path; 2082 struct btrfs_path *path;
2046 struct btrfs_ioctl_search_key *sk = &args->key;
2047 struct btrfs_fs_info *info = BTRFS_I(inode)->root->fs_info; 2083 struct btrfs_fs_info *info = BTRFS_I(inode)->root->fs_info;
2048 int ret; 2084 int ret;
2049 int num_found = 0; 2085 int num_found = 0;
2050 unsigned long sk_offset = 0; 2086 unsigned long sk_offset = 0;
2051 2087
2088 if (*buf_size < sizeof(struct btrfs_ioctl_search_header)) {
2089 *buf_size = sizeof(struct btrfs_ioctl_search_header);
2090 return -EOVERFLOW;
2091 }
2092
2052 path = btrfs_alloc_path(); 2093 path = btrfs_alloc_path();
2053 if (!path) 2094 if (!path)
2054 return -ENOMEM; 2095 return -ENOMEM;
@@ -2082,14 +2123,15 @@ static noinline int search_ioctl(struct inode *inode,
2082 ret = 0; 2123 ret = 0;
2083 goto err; 2124 goto err;
2084 } 2125 }
2085 ret = copy_to_sk(root, path, &key, sk, args->buf, 2126 ret = copy_to_sk(root, path, &key, sk, buf_size, ubuf,
2086 &sk_offset, &num_found); 2127 &sk_offset, &num_found);
2087 btrfs_release_path(path); 2128 btrfs_release_path(path);
2088 if (ret || num_found >= sk->nr_items) 2129 if (ret)
2089 break; 2130 break;
2090 2131
2091 } 2132 }
2092 ret = 0; 2133 if (ret > 0)
2134 ret = 0;
2093err: 2135err:
2094 sk->nr_items = num_found; 2136 sk->nr_items = num_found;
2095 btrfs_free_path(path); 2137 btrfs_free_path(path);
@@ -2099,22 +2141,73 @@ err:
2099static noinline int btrfs_ioctl_tree_search(struct file *file, 2141static noinline int btrfs_ioctl_tree_search(struct file *file,
2100 void __user *argp) 2142 void __user *argp)
2101{ 2143{
2102 struct btrfs_ioctl_search_args *args; 2144 struct btrfs_ioctl_search_args __user *uargs;
2103 struct inode *inode; 2145 struct btrfs_ioctl_search_key sk;
2104 int ret; 2146 struct inode *inode;
2147 int ret;
2148 size_t buf_size;
2105 2149
2106 if (!capable(CAP_SYS_ADMIN)) 2150 if (!capable(CAP_SYS_ADMIN))
2107 return -EPERM; 2151 return -EPERM;
2108 2152
2109 args = memdup_user(argp, sizeof(*args)); 2153 uargs = (struct btrfs_ioctl_search_args __user *)argp;
2110 if (IS_ERR(args)) 2154
2111 return PTR_ERR(args); 2155 if (copy_from_user(&sk, &uargs->key, sizeof(sk)))
2156 return -EFAULT;
2157
2158 buf_size = sizeof(uargs->buf);
2112 2159
2113 inode = file_inode(file); 2160 inode = file_inode(file);
2114 ret = search_ioctl(inode, args); 2161 ret = search_ioctl(inode, &sk, &buf_size, uargs->buf);
2115 if (ret == 0 && copy_to_user(argp, args, sizeof(*args))) 2162
2163 /*
2164 * In the origin implementation an overflow is handled by returning a
2165 * search header with a len of zero, so reset ret.
2166 */
2167 if (ret == -EOVERFLOW)
2168 ret = 0;
2169
2170 if (ret == 0 && copy_to_user(&uargs->key, &sk, sizeof(sk)))
2116 ret = -EFAULT; 2171 ret = -EFAULT;
2117 kfree(args); 2172 return ret;
2173}
2174
2175static noinline int btrfs_ioctl_tree_search_v2(struct file *file,
2176 void __user *argp)
2177{
2178 struct btrfs_ioctl_search_args_v2 __user *uarg;
2179 struct btrfs_ioctl_search_args_v2 args;
2180 struct inode *inode;
2181 int ret;
2182 size_t buf_size;
2183 const size_t buf_limit = 16 * 1024 * 1024;
2184
2185 if (!capable(CAP_SYS_ADMIN))
2186 return -EPERM;
2187
2188 /* copy search header and buffer size */
2189 uarg = (struct btrfs_ioctl_search_args_v2 __user *)argp;
2190 if (copy_from_user(&args, uarg, sizeof(args)))
2191 return -EFAULT;
2192
2193 buf_size = args.buf_size;
2194
2195 if (buf_size < sizeof(struct btrfs_ioctl_search_header))
2196 return -EOVERFLOW;
2197
2198 /* limit result size to 16MB */
2199 if (buf_size > buf_limit)
2200 buf_size = buf_limit;
2201
2202 inode = file_inode(file);
2203 ret = search_ioctl(inode, &args.key, &buf_size,
2204 (char *)(&uarg->buf[0]));
2205 if (ret == 0 && copy_to_user(&uarg->key, &args.key, sizeof(args.key)))
2206 ret = -EFAULT;
2207 else if (ret == -EOVERFLOW &&
2208 copy_to_user(&uarg->buf_size, &buf_size, sizeof(buf_size)))
2209 ret = -EFAULT;
2210
2118 return ret; 2211 return ret;
2119} 2212}
2120 2213
@@ -5198,6 +5291,8 @@ long btrfs_ioctl(struct file *file, unsigned int
5198 return btrfs_ioctl_trans_end(file); 5291 return btrfs_ioctl_trans_end(file);
5199 case BTRFS_IOC_TREE_SEARCH: 5292 case BTRFS_IOC_TREE_SEARCH:
5200 return btrfs_ioctl_tree_search(file, argp); 5293 return btrfs_ioctl_tree_search(file, argp);
5294 case BTRFS_IOC_TREE_SEARCH_V2:
5295 return btrfs_ioctl_tree_search_v2(file, argp);
5201 case BTRFS_IOC_INO_LOOKUP: 5296 case BTRFS_IOC_INO_LOOKUP:
5202 return btrfs_ioctl_ino_lookup(file, argp); 5297 return btrfs_ioctl_ino_lookup(file, argp);
5203 case BTRFS_IOC_INO_PATHS: 5298 case BTRFS_IOC_INO_PATHS:
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index cf5aead95a7f..98cb6b2630f9 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1798,8 +1798,10 @@ static int qgroup_shared_accounting(struct btrfs_trans_handle *trans,
1798 return -ENOMEM; 1798 return -ENOMEM;
1799 1799
1800 tmp = ulist_alloc(GFP_NOFS); 1800 tmp = ulist_alloc(GFP_NOFS);
1801 if (!tmp) 1801 if (!tmp) {
1802 ulist_free(qgroups);
1802 return -ENOMEM; 1803 return -ENOMEM;
1804 }
1803 1805
1804 btrfs_get_tree_mod_seq(fs_info, &elem); 1806 btrfs_get_tree_mod_seq(fs_info, &elem);
1805 ret = btrfs_find_all_roots(trans, fs_info, oper->bytenr, elem.seq, 1807 ret = btrfs_find_all_roots(trans, fs_info, oper->bytenr, elem.seq,
diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c
index 30947f923620..09230cf3a244 100644
--- a/fs/btrfs/reada.c
+++ b/fs/btrfs/reada.c
@@ -428,8 +428,13 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
428 continue; 428 continue;
429 } 429 }
430 if (!dev->bdev) { 430 if (!dev->bdev) {
431 /* cannot read ahead on missing device */ 431 /*
432 continue; 432 * cannot read ahead on missing device, but for RAID5/6,
433 * REQ_GET_READ_MIRRORS return 1. So don't skip missing
434 * device for such case.
435 */
436 if (nzones > 1)
437 continue;
433 } 438 }
434 if (dev_replace_is_ongoing && 439 if (dev_replace_is_ongoing &&
435 dev == fs_info->dev_replace.tgtdev) { 440 dev == fs_info->dev_replace.tgtdev) {
diff --git a/fs/btrfs/tests/btrfs-tests.c b/fs/btrfs/tests/btrfs-tests.c
index a5dcacb5df9c..9626252ee6b4 100644
--- a/fs/btrfs/tests/btrfs-tests.c
+++ b/fs/btrfs/tests/btrfs-tests.c
@@ -135,7 +135,7 @@ restart:
135 radix_tree_for_each_slot(slot, &fs_info->buffer_radix, &iter, 0) { 135 radix_tree_for_each_slot(slot, &fs_info->buffer_radix, &iter, 0) {
136 struct extent_buffer *eb; 136 struct extent_buffer *eb;
137 137
138 eb = radix_tree_deref_slot(slot); 138 eb = radix_tree_deref_slot_protected(slot, &fs_info->buffer_lock);
139 if (!eb) 139 if (!eb)
140 continue; 140 continue;
141 /* Shouldn't happen but that kind of thinking creates CVE's */ 141 /* Shouldn't happen but that kind of thinking creates CVE's */
diff --git a/fs/btrfs/tests/qgroup-tests.c b/fs/btrfs/tests/qgroup-tests.c
index fa691b754aaf..ec3dcb202357 100644
--- a/fs/btrfs/tests/qgroup-tests.c
+++ b/fs/btrfs/tests/qgroup-tests.c
@@ -415,6 +415,8 @@ int btrfs_test_qgroups(void)
415 ret = -ENOMEM; 415 ret = -ENOMEM;
416 goto out; 416 goto out;
417 } 417 }
418 btrfs_set_header_level(root->node, 0);
419 btrfs_set_header_nritems(root->node, 0);
418 root->alloc_bytenr += 8192; 420 root->alloc_bytenr += 8192;
419 421
420 tmp_root = btrfs_alloc_dummy_root(); 422 tmp_root = btrfs_alloc_dummy_root();
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 9630f10f8e1e..511839c04f11 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -1284,11 +1284,13 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
1284 goto fail; 1284 goto fail;
1285 } 1285 }
1286 1286
1287 pending->error = btrfs_qgroup_inherit(trans, fs_info, 1287 ret = btrfs_qgroup_inherit(trans, fs_info,
1288 root->root_key.objectid, 1288 root->root_key.objectid,
1289 objectid, pending->inherit); 1289 objectid, pending->inherit);
1290 if (pending->error) 1290 if (ret) {
1291 goto no_free_objectid; 1291 btrfs_abort_transaction(trans, root, ret);
1292 goto fail;
1293 }
1292 1294
1293 /* see comments in should_cow_block() */ 1295 /* see comments in should_cow_block() */
1294 set_bit(BTRFS_ROOT_FORCE_COW, &root->state); 1296 set_bit(BTRFS_ROOT_FORCE_COW, &root->state);
diff --git a/fs/ceph/acl.c b/fs/ceph/acl.c
index 21887d63dad5..469f2e8657e8 100644
--- a/fs/ceph/acl.c
+++ b/fs/ceph/acl.c
@@ -104,12 +104,6 @@ int ceph_set_acl(struct inode *inode, struct posix_acl *acl, int type)
104 umode_t new_mode = inode->i_mode, old_mode = inode->i_mode; 104 umode_t new_mode = inode->i_mode, old_mode = inode->i_mode;
105 struct dentry *dentry; 105 struct dentry *dentry;
106 106
107 if (acl) {
108 ret = posix_acl_valid(acl);
109 if (ret < 0)
110 goto out;
111 }
112
113 switch (type) { 107 switch (type) {
114 case ACL_TYPE_ACCESS: 108 case ACL_TYPE_ACCESS:
115 name = POSIX_ACL_XATTR_ACCESS; 109 name = POSIX_ACL_XATTR_ACCESS;
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 4f3f69079f36..90b3954d48ed 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -211,18 +211,15 @@ static int readpage_nounlock(struct file *filp, struct page *page)
211 SetPageError(page); 211 SetPageError(page);
212 ceph_fscache_readpage_cancel(inode, page); 212 ceph_fscache_readpage_cancel(inode, page);
213 goto out; 213 goto out;
214 } else {
215 if (err < PAGE_CACHE_SIZE) {
216 /* zero fill remainder of page */
217 zero_user_segment(page, err, PAGE_CACHE_SIZE);
218 } else {
219 flush_dcache_page(page);
220 }
221 } 214 }
222 SetPageUptodate(page); 215 if (err < PAGE_CACHE_SIZE)
216 /* zero fill remainder of page */
217 zero_user_segment(page, err, PAGE_CACHE_SIZE);
218 else
219 flush_dcache_page(page);
223 220
224 if (err >= 0) 221 SetPageUptodate(page);
225 ceph_readpage_to_fscache(inode, page); 222 ceph_readpage_to_fscache(inode, page);
226 223
227out: 224out:
228 return err < 0 ? err : 0; 225 return err < 0 ? err : 0;
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index c561b628ebce..1fde164b74b5 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -221,8 +221,8 @@ int ceph_unreserve_caps(struct ceph_mds_client *mdsc,
221 return 0; 221 return 0;
222} 222}
223 223
224static struct ceph_cap *get_cap(struct ceph_mds_client *mdsc, 224struct ceph_cap *ceph_get_cap(struct ceph_mds_client *mdsc,
225 struct ceph_cap_reservation *ctx) 225 struct ceph_cap_reservation *ctx)
226{ 226{
227 struct ceph_cap *cap = NULL; 227 struct ceph_cap *cap = NULL;
228 228
@@ -508,15 +508,14 @@ static void __check_cap_issue(struct ceph_inode_info *ci, struct ceph_cap *cap,
508 * it is < 0. (This is so we can atomically add the cap and add an 508 * it is < 0. (This is so we can atomically add the cap and add an
509 * open file reference to it.) 509 * open file reference to it.)
510 */ 510 */
511int ceph_add_cap(struct inode *inode, 511void ceph_add_cap(struct inode *inode,
512 struct ceph_mds_session *session, u64 cap_id, 512 struct ceph_mds_session *session, u64 cap_id,
513 int fmode, unsigned issued, unsigned wanted, 513 int fmode, unsigned issued, unsigned wanted,
514 unsigned seq, unsigned mseq, u64 realmino, int flags, 514 unsigned seq, unsigned mseq, u64 realmino, int flags,
515 struct ceph_cap_reservation *caps_reservation) 515 struct ceph_cap **new_cap)
516{ 516{
517 struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; 517 struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
518 struct ceph_inode_info *ci = ceph_inode(inode); 518 struct ceph_inode_info *ci = ceph_inode(inode);
519 struct ceph_cap *new_cap = NULL;
520 struct ceph_cap *cap; 519 struct ceph_cap *cap;
521 int mds = session->s_mds; 520 int mds = session->s_mds;
522 int actual_wanted; 521 int actual_wanted;
@@ -531,20 +530,10 @@ int ceph_add_cap(struct inode *inode,
531 if (fmode >= 0) 530 if (fmode >= 0)
532 wanted |= ceph_caps_for_mode(fmode); 531 wanted |= ceph_caps_for_mode(fmode);
533 532
534retry:
535 spin_lock(&ci->i_ceph_lock);
536 cap = __get_cap_for_mds(ci, mds); 533 cap = __get_cap_for_mds(ci, mds);
537 if (!cap) { 534 if (!cap) {
538 if (new_cap) { 535 cap = *new_cap;
539 cap = new_cap; 536 *new_cap = NULL;
540 new_cap = NULL;
541 } else {
542 spin_unlock(&ci->i_ceph_lock);
543 new_cap = get_cap(mdsc, caps_reservation);
544 if (new_cap == NULL)
545 return -ENOMEM;
546 goto retry;
547 }
548 537
549 cap->issued = 0; 538 cap->issued = 0;
550 cap->implemented = 0; 539 cap->implemented = 0;
@@ -562,9 +551,6 @@ retry:
562 session->s_nr_caps++; 551 session->s_nr_caps++;
563 spin_unlock(&session->s_cap_lock); 552 spin_unlock(&session->s_cap_lock);
564 } else { 553 } else {
565 if (new_cap)
566 ceph_put_cap(mdsc, new_cap);
567
568 /* 554 /*
569 * auth mds of the inode changed. we received the cap export 555 * auth mds of the inode changed. we received the cap export
570 * message, but still haven't received the cap import message. 556 * message, but still haven't received the cap import message.
@@ -626,7 +612,6 @@ retry:
626 ci->i_auth_cap = cap; 612 ci->i_auth_cap = cap;
627 cap->mds_wanted = wanted; 613 cap->mds_wanted = wanted;
628 } 614 }
629 ci->i_cap_exporting_issued = 0;
630 } else { 615 } else {
631 WARN_ON(ci->i_auth_cap == cap); 616 WARN_ON(ci->i_auth_cap == cap);
632 } 617 }
@@ -648,9 +633,6 @@ retry:
648 633
649 if (fmode >= 0) 634 if (fmode >= 0)
650 __ceph_get_fmode(ci, fmode); 635 __ceph_get_fmode(ci, fmode);
651 spin_unlock(&ci->i_ceph_lock);
652 wake_up_all(&ci->i_cap_wq);
653 return 0;
654} 636}
655 637
656/* 638/*
@@ -685,7 +667,7 @@ static int __cap_is_valid(struct ceph_cap *cap)
685 */ 667 */
686int __ceph_caps_issued(struct ceph_inode_info *ci, int *implemented) 668int __ceph_caps_issued(struct ceph_inode_info *ci, int *implemented)
687{ 669{
688 int have = ci->i_snap_caps | ci->i_cap_exporting_issued; 670 int have = ci->i_snap_caps;
689 struct ceph_cap *cap; 671 struct ceph_cap *cap;
690 struct rb_node *p; 672 struct rb_node *p;
691 673
@@ -900,7 +882,7 @@ int __ceph_caps_mds_wanted(struct ceph_inode_info *ci)
900 */ 882 */
901static int __ceph_is_any_caps(struct ceph_inode_info *ci) 883static int __ceph_is_any_caps(struct ceph_inode_info *ci)
902{ 884{
903 return !RB_EMPTY_ROOT(&ci->i_caps) || ci->i_cap_exporting_issued; 885 return !RB_EMPTY_ROOT(&ci->i_caps);
904} 886}
905 887
906int ceph_is_any_caps(struct inode *inode) 888int ceph_is_any_caps(struct inode *inode)
@@ -2397,32 +2379,30 @@ static void invalidate_aliases(struct inode *inode)
2397 * actually be a revocation if it specifies a smaller cap set.) 2379 * actually be a revocation if it specifies a smaller cap set.)
2398 * 2380 *
2399 * caller holds s_mutex and i_ceph_lock, we drop both. 2381 * caller holds s_mutex and i_ceph_lock, we drop both.
2400 *
2401 * return value:
2402 * 0 - ok
2403 * 1 - check_caps on auth cap only (writeback)
2404 * 2 - check_caps (ack revoke)
2405 */ 2382 */
2406static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, 2383static void handle_cap_grant(struct ceph_mds_client *mdsc,
2384 struct inode *inode, struct ceph_mds_caps *grant,
2385 void *snaptrace, int snaptrace_len,
2386 struct ceph_buffer *xattr_buf,
2407 struct ceph_mds_session *session, 2387 struct ceph_mds_session *session,
2408 struct ceph_cap *cap, 2388 struct ceph_cap *cap, int issued)
2409 struct ceph_buffer *xattr_buf) 2389 __releases(ci->i_ceph_lock)
2410 __releases(ci->i_ceph_lock)
2411{ 2390{
2412 struct ceph_inode_info *ci = ceph_inode(inode); 2391 struct ceph_inode_info *ci = ceph_inode(inode);
2413 int mds = session->s_mds; 2392 int mds = session->s_mds;
2414 int seq = le32_to_cpu(grant->seq); 2393 int seq = le32_to_cpu(grant->seq);
2415 int newcaps = le32_to_cpu(grant->caps); 2394 int newcaps = le32_to_cpu(grant->caps);
2416 int issued, implemented, used, wanted, dirty; 2395 int used, wanted, dirty;
2417 u64 size = le64_to_cpu(grant->size); 2396 u64 size = le64_to_cpu(grant->size);
2418 u64 max_size = le64_to_cpu(grant->max_size); 2397 u64 max_size = le64_to_cpu(grant->max_size);
2419 struct timespec mtime, atime, ctime; 2398 struct timespec mtime, atime, ctime;
2420 int check_caps = 0; 2399 int check_caps = 0;
2421 int wake = 0; 2400 bool wake = 0;
2422 int writeback = 0; 2401 bool writeback = 0;
2423 int queue_invalidate = 0; 2402 bool queue_trunc = 0;
2424 int deleted_inode = 0; 2403 bool queue_invalidate = 0;
2425 int queue_revalidate = 0; 2404 bool queue_revalidate = 0;
2405 bool deleted_inode = 0;
2426 2406
2427 dout("handle_cap_grant inode %p cap %p mds%d seq %d %s\n", 2407 dout("handle_cap_grant inode %p cap %p mds%d seq %d %s\n",
2428 inode, cap, mds, seq, ceph_cap_string(newcaps)); 2408 inode, cap, mds, seq, ceph_cap_string(newcaps));
@@ -2466,16 +2446,13 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
2466 } 2446 }
2467 2447
2468 /* side effects now are allowed */ 2448 /* side effects now are allowed */
2469
2470 issued = __ceph_caps_issued(ci, &implemented);
2471 issued |= implemented | __ceph_caps_dirty(ci);
2472
2473 cap->cap_gen = session->s_cap_gen; 2449 cap->cap_gen = session->s_cap_gen;
2474 cap->seq = seq; 2450 cap->seq = seq;
2475 2451
2476 __check_cap_issue(ci, cap, newcaps); 2452 __check_cap_issue(ci, cap, newcaps);
2477 2453
2478 if ((issued & CEPH_CAP_AUTH_EXCL) == 0) { 2454 if ((newcaps & CEPH_CAP_AUTH_SHARED) &&
2455 (issued & CEPH_CAP_AUTH_EXCL) == 0) {
2479 inode->i_mode = le32_to_cpu(grant->mode); 2456 inode->i_mode = le32_to_cpu(grant->mode);
2480 inode->i_uid = make_kuid(&init_user_ns, le32_to_cpu(grant->uid)); 2457 inode->i_uid = make_kuid(&init_user_ns, le32_to_cpu(grant->uid));
2481 inode->i_gid = make_kgid(&init_user_ns, le32_to_cpu(grant->gid)); 2458 inode->i_gid = make_kgid(&init_user_ns, le32_to_cpu(grant->gid));
@@ -2484,7 +2461,8 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
2484 from_kgid(&init_user_ns, inode->i_gid)); 2461 from_kgid(&init_user_ns, inode->i_gid));
2485 } 2462 }
2486 2463
2487 if ((issued & CEPH_CAP_LINK_EXCL) == 0) { 2464 if ((newcaps & CEPH_CAP_AUTH_SHARED) &&
2465 (issued & CEPH_CAP_LINK_EXCL) == 0) {
2488 set_nlink(inode, le32_to_cpu(grant->nlink)); 2466 set_nlink(inode, le32_to_cpu(grant->nlink));
2489 if (inode->i_nlink == 0 && 2467 if (inode->i_nlink == 0 &&
2490 (newcaps & (CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL))) 2468 (newcaps & (CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL)))
@@ -2511,30 +2489,35 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
2511 if ((issued & CEPH_CAP_FILE_CACHE) && ci->i_rdcache_gen > 1) 2489 if ((issued & CEPH_CAP_FILE_CACHE) && ci->i_rdcache_gen > 1)
2512 queue_revalidate = 1; 2490 queue_revalidate = 1;
2513 2491
2514 /* size/ctime/mtime/atime? */ 2492 if (newcaps & CEPH_CAP_ANY_RD) {
2515 ceph_fill_file_size(inode, issued, 2493 /* ctime/mtime/atime? */
2516 le32_to_cpu(grant->truncate_seq), 2494 ceph_decode_timespec(&mtime, &grant->mtime);
2517 le64_to_cpu(grant->truncate_size), size); 2495 ceph_decode_timespec(&atime, &grant->atime);
2518 ceph_decode_timespec(&mtime, &grant->mtime); 2496 ceph_decode_timespec(&ctime, &grant->ctime);
2519 ceph_decode_timespec(&atime, &grant->atime); 2497 ceph_fill_file_time(inode, issued,
2520 ceph_decode_timespec(&ctime, &grant->ctime); 2498 le32_to_cpu(grant->time_warp_seq),
2521 ceph_fill_file_time(inode, issued, 2499 &ctime, &mtime, &atime);
2522 le32_to_cpu(grant->time_warp_seq), &ctime, &mtime, 2500 }
2523 &atime); 2501
2524 2502 if (newcaps & (CEPH_CAP_ANY_FILE_RD | CEPH_CAP_ANY_FILE_WR)) {
2525 2503 /* file layout may have changed */
2526 /* file layout may have changed */ 2504 ci->i_layout = grant->layout;
2527 ci->i_layout = grant->layout; 2505 /* size/truncate_seq? */
2528 2506 queue_trunc = ceph_fill_file_size(inode, issued,
2529 /* max size increase? */ 2507 le32_to_cpu(grant->truncate_seq),
2530 if (ci->i_auth_cap == cap && max_size != ci->i_max_size) { 2508 le64_to_cpu(grant->truncate_size),
2531 dout("max_size %lld -> %llu\n", ci->i_max_size, max_size); 2509 size);
2532 ci->i_max_size = max_size; 2510 /* max size increase? */
2533 if (max_size >= ci->i_wanted_max_size) { 2511 if (ci->i_auth_cap == cap && max_size != ci->i_max_size) {
2534 ci->i_wanted_max_size = 0; /* reset */ 2512 dout("max_size %lld -> %llu\n",
2535 ci->i_requested_max_size = 0; 2513 ci->i_max_size, max_size);
2514 ci->i_max_size = max_size;
2515 if (max_size >= ci->i_wanted_max_size) {
2516 ci->i_wanted_max_size = 0; /* reset */
2517 ci->i_requested_max_size = 0;
2518 }
2519 wake = 1;
2536 } 2520 }
2537 wake = 1;
2538 } 2521 }
2539 2522
2540 /* check cap bits */ 2523 /* check cap bits */
@@ -2595,6 +2578,23 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
2595 2578
2596 spin_unlock(&ci->i_ceph_lock); 2579 spin_unlock(&ci->i_ceph_lock);
2597 2580
2581 if (le32_to_cpu(grant->op) == CEPH_CAP_OP_IMPORT) {
2582 down_write(&mdsc->snap_rwsem);
2583 ceph_update_snap_trace(mdsc, snaptrace,
2584 snaptrace + snaptrace_len, false);
2585 downgrade_write(&mdsc->snap_rwsem);
2586 kick_flushing_inode_caps(mdsc, session, inode);
2587 up_read(&mdsc->snap_rwsem);
2588 if (newcaps & ~issued)
2589 wake = 1;
2590 }
2591
2592 if (queue_trunc) {
2593 ceph_queue_vmtruncate(inode);
2594 ceph_queue_revalidate(inode);
2595 } else if (queue_revalidate)
2596 ceph_queue_revalidate(inode);
2597
2598 if (writeback) 2598 if (writeback)
2599 /* 2599 /*
2600 * queue inode for writeback: we can't actually call 2600 * queue inode for writeback: we can't actually call
@@ -2606,8 +2606,6 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
2606 ceph_queue_invalidate(inode); 2606 ceph_queue_invalidate(inode);
2607 if (deleted_inode) 2607 if (deleted_inode)
2608 invalidate_aliases(inode); 2608 invalidate_aliases(inode);
2609 if (queue_revalidate)
2610 ceph_queue_revalidate(inode);
2611 if (wake) 2609 if (wake)
2612 wake_up_all(&ci->i_cap_wq); 2610 wake_up_all(&ci->i_cap_wq);
2613 2611
@@ -2784,7 +2782,7 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex,
2784{ 2782{
2785 struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; 2783 struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
2786 struct ceph_mds_session *tsession = NULL; 2784 struct ceph_mds_session *tsession = NULL;
2787 struct ceph_cap *cap, *tcap; 2785 struct ceph_cap *cap, *tcap, *new_cap = NULL;
2788 struct ceph_inode_info *ci = ceph_inode(inode); 2786 struct ceph_inode_info *ci = ceph_inode(inode);
2789 u64 t_cap_id; 2787 u64 t_cap_id;
2790 unsigned mseq = le32_to_cpu(ex->migrate_seq); 2788 unsigned mseq = le32_to_cpu(ex->migrate_seq);
@@ -2807,7 +2805,7 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex,
2807retry: 2805retry:
2808 spin_lock(&ci->i_ceph_lock); 2806 spin_lock(&ci->i_ceph_lock);
2809 cap = __get_cap_for_mds(ci, mds); 2807 cap = __get_cap_for_mds(ci, mds);
2810 if (!cap) 2808 if (!cap || cap->cap_id != le64_to_cpu(ex->cap_id))
2811 goto out_unlock; 2809 goto out_unlock;
2812 2810
2813 if (target < 0) { 2811 if (target < 0) {
@@ -2846,15 +2844,14 @@ retry:
2846 } 2844 }
2847 __ceph_remove_cap(cap, false); 2845 __ceph_remove_cap(cap, false);
2848 goto out_unlock; 2846 goto out_unlock;
2849 } 2847 } else if (tsession) {
2850
2851 if (tsession) {
2852 int flag = (cap == ci->i_auth_cap) ? CEPH_CAP_FLAG_AUTH : 0;
2853 spin_unlock(&ci->i_ceph_lock);
2854 /* add placeholder for the export tagert */ 2848 /* add placeholder for the export tagert */
2849 int flag = (cap == ci->i_auth_cap) ? CEPH_CAP_FLAG_AUTH : 0;
2855 ceph_add_cap(inode, tsession, t_cap_id, -1, issued, 0, 2850 ceph_add_cap(inode, tsession, t_cap_id, -1, issued, 0,
2856 t_seq - 1, t_mseq, (u64)-1, flag, NULL); 2851 t_seq - 1, t_mseq, (u64)-1, flag, &new_cap);
2857 goto retry; 2852
2853 __ceph_remove_cap(cap, false);
2854 goto out_unlock;
2858 } 2855 }
2859 2856
2860 spin_unlock(&ci->i_ceph_lock); 2857 spin_unlock(&ci->i_ceph_lock);
@@ -2873,6 +2870,7 @@ retry:
2873 SINGLE_DEPTH_NESTING); 2870 SINGLE_DEPTH_NESTING);
2874 } 2871 }
2875 ceph_add_cap_releases(mdsc, tsession); 2872 ceph_add_cap_releases(mdsc, tsession);
2873 new_cap = ceph_get_cap(mdsc, NULL);
2876 } else { 2874 } else {
2877 WARN_ON(1); 2875 WARN_ON(1);
2878 tsession = NULL; 2876 tsession = NULL;
@@ -2887,24 +2885,27 @@ out_unlock:
2887 mutex_unlock(&tsession->s_mutex); 2885 mutex_unlock(&tsession->s_mutex);
2888 ceph_put_mds_session(tsession); 2886 ceph_put_mds_session(tsession);
2889 } 2887 }
2888 if (new_cap)
2889 ceph_put_cap(mdsc, new_cap);
2890} 2890}
2891 2891
2892/* 2892/*
2893 * Handle cap IMPORT. If there are temp bits from an older EXPORT, 2893 * Handle cap IMPORT.
2894 * clean them up.
2895 * 2894 *
2896 * caller holds s_mutex. 2895 * caller holds s_mutex. acquires i_ceph_lock
2897 */ 2896 */
2898static void handle_cap_import(struct ceph_mds_client *mdsc, 2897static void handle_cap_import(struct ceph_mds_client *mdsc,
2899 struct inode *inode, struct ceph_mds_caps *im, 2898 struct inode *inode, struct ceph_mds_caps *im,
2900 struct ceph_mds_cap_peer *ph, 2899 struct ceph_mds_cap_peer *ph,
2901 struct ceph_mds_session *session, 2900 struct ceph_mds_session *session,
2902 void *snaptrace, int snaptrace_len) 2901 struct ceph_cap **target_cap, int *old_issued)
2902 __acquires(ci->i_ceph_lock)
2903{ 2903{
2904 struct ceph_inode_info *ci = ceph_inode(inode); 2904 struct ceph_inode_info *ci = ceph_inode(inode);
2905 struct ceph_cap *cap; 2905 struct ceph_cap *cap, *ocap, *new_cap = NULL;
2906 int mds = session->s_mds; 2906 int mds = session->s_mds;
2907 unsigned issued = le32_to_cpu(im->caps); 2907 int issued;
2908 unsigned caps = le32_to_cpu(im->caps);
2908 unsigned wanted = le32_to_cpu(im->wanted); 2909 unsigned wanted = le32_to_cpu(im->wanted);
2909 unsigned seq = le32_to_cpu(im->seq); 2910 unsigned seq = le32_to_cpu(im->seq);
2910 unsigned mseq = le32_to_cpu(im->migrate_seq); 2911 unsigned mseq = le32_to_cpu(im->migrate_seq);
@@ -2924,40 +2925,52 @@ static void handle_cap_import(struct ceph_mds_client *mdsc,
2924 dout("handle_cap_import inode %p ci %p mds%d mseq %d peer %d\n", 2925 dout("handle_cap_import inode %p ci %p mds%d mseq %d peer %d\n",
2925 inode, ci, mds, mseq, peer); 2926 inode, ci, mds, mseq, peer);
2926 2927
2928retry:
2927 spin_lock(&ci->i_ceph_lock); 2929 spin_lock(&ci->i_ceph_lock);
2928 cap = peer >= 0 ? __get_cap_for_mds(ci, peer) : NULL; 2930 cap = __get_cap_for_mds(ci, mds);
2929 if (cap && cap->cap_id == p_cap_id) { 2931 if (!cap) {
2932 if (!new_cap) {
2933 spin_unlock(&ci->i_ceph_lock);
2934 new_cap = ceph_get_cap(mdsc, NULL);
2935 goto retry;
2936 }
2937 cap = new_cap;
2938 } else {
2939 if (new_cap) {
2940 ceph_put_cap(mdsc, new_cap);
2941 new_cap = NULL;
2942 }
2943 }
2944
2945 __ceph_caps_issued(ci, &issued);
2946 issued |= __ceph_caps_dirty(ci);
2947
2948 ceph_add_cap(inode, session, cap_id, -1, caps, wanted, seq, mseq,
2949 realmino, CEPH_CAP_FLAG_AUTH, &new_cap);
2950
2951 ocap = peer >= 0 ? __get_cap_for_mds(ci, peer) : NULL;
2952 if (ocap && ocap->cap_id == p_cap_id) {
2930 dout(" remove export cap %p mds%d flags %d\n", 2953 dout(" remove export cap %p mds%d flags %d\n",
2931 cap, peer, ph->flags); 2954 ocap, peer, ph->flags);
2932 if ((ph->flags & CEPH_CAP_FLAG_AUTH) && 2955 if ((ph->flags & CEPH_CAP_FLAG_AUTH) &&
2933 (cap->seq != le32_to_cpu(ph->seq) || 2956 (ocap->seq != le32_to_cpu(ph->seq) ||
2934 cap->mseq != le32_to_cpu(ph->mseq))) { 2957 ocap->mseq != le32_to_cpu(ph->mseq))) {
2935 pr_err("handle_cap_import: mismatched seq/mseq: " 2958 pr_err("handle_cap_import: mismatched seq/mseq: "
2936 "ino (%llx.%llx) mds%d seq %d mseq %d " 2959 "ino (%llx.%llx) mds%d seq %d mseq %d "
2937 "importer mds%d has peer seq %d mseq %d\n", 2960 "importer mds%d has peer seq %d mseq %d\n",
2938 ceph_vinop(inode), peer, cap->seq, 2961 ceph_vinop(inode), peer, ocap->seq,
2939 cap->mseq, mds, le32_to_cpu(ph->seq), 2962 ocap->mseq, mds, le32_to_cpu(ph->seq),
2940 le32_to_cpu(ph->mseq)); 2963 le32_to_cpu(ph->mseq));
2941 } 2964 }
2942 ci->i_cap_exporting_issued = cap->issued; 2965 __ceph_remove_cap(ocap, (ph->flags & CEPH_CAP_FLAG_RELEASE));
2943 __ceph_remove_cap(cap, (ph->flags & CEPH_CAP_FLAG_RELEASE));
2944 } 2966 }
2945 2967
2946 /* make sure we re-request max_size, if necessary */ 2968 /* make sure we re-request max_size, if necessary */
2947 ci->i_wanted_max_size = 0; 2969 ci->i_wanted_max_size = 0;
2948 ci->i_requested_max_size = 0; 2970 ci->i_requested_max_size = 0;
2949 spin_unlock(&ci->i_ceph_lock);
2950
2951 down_write(&mdsc->snap_rwsem);
2952 ceph_update_snap_trace(mdsc, snaptrace, snaptrace+snaptrace_len,
2953 false);
2954 downgrade_write(&mdsc->snap_rwsem);
2955 ceph_add_cap(inode, session, cap_id, -1,
2956 issued, wanted, seq, mseq, realmino, CEPH_CAP_FLAG_AUTH,
2957 NULL /* no caps context */);
2958 kick_flushing_inode_caps(mdsc, session, inode);
2959 up_read(&mdsc->snap_rwsem);
2960 2971
2972 *old_issued = issued;
2973 *target_cap = cap;
2961} 2974}
2962 2975
2963/* 2976/*
@@ -2977,7 +2990,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
2977 struct ceph_mds_caps *h; 2990 struct ceph_mds_caps *h;
2978 struct ceph_mds_cap_peer *peer = NULL; 2991 struct ceph_mds_cap_peer *peer = NULL;
2979 int mds = session->s_mds; 2992 int mds = session->s_mds;
2980 int op; 2993 int op, issued;
2981 u32 seq, mseq; 2994 u32 seq, mseq;
2982 struct ceph_vino vino; 2995 struct ceph_vino vino;
2983 u64 cap_id; 2996 u64 cap_id;
@@ -3069,7 +3082,10 @@ void ceph_handle_caps(struct ceph_mds_session *session,
3069 3082
3070 case CEPH_CAP_OP_IMPORT: 3083 case CEPH_CAP_OP_IMPORT:
3071 handle_cap_import(mdsc, inode, h, peer, session, 3084 handle_cap_import(mdsc, inode, h, peer, session,
3072 snaptrace, snaptrace_len); 3085 &cap, &issued);
3086 handle_cap_grant(mdsc, inode, h, snaptrace, snaptrace_len,
3087 msg->middle, session, cap, issued);
3088 goto done_unlocked;
3073 } 3089 }
3074 3090
3075 /* the rest require a cap */ 3091 /* the rest require a cap */
@@ -3086,8 +3102,10 @@ void ceph_handle_caps(struct ceph_mds_session *session,
3086 switch (op) { 3102 switch (op) {
3087 case CEPH_CAP_OP_REVOKE: 3103 case CEPH_CAP_OP_REVOKE:
3088 case CEPH_CAP_OP_GRANT: 3104 case CEPH_CAP_OP_GRANT:
3089 case CEPH_CAP_OP_IMPORT: 3105 __ceph_caps_issued(ci, &issued);
3090 handle_cap_grant(inode, h, session, cap, msg->middle); 3106 issued |= __ceph_caps_dirty(ci);
3107 handle_cap_grant(mdsc, inode, h, NULL, 0, msg->middle,
3108 session, cap, issued);
3091 goto done_unlocked; 3109 goto done_unlocked;
3092 3110
3093 case CEPH_CAP_OP_FLUSH_ACK: 3111 case CEPH_CAP_OP_FLUSH_ACK:
diff --git a/fs/ceph/export.c b/fs/ceph/export.c
index 00d6af6a32ec..8d7d782f4382 100644
--- a/fs/ceph/export.c
+++ b/fs/ceph/export.c
@@ -169,7 +169,7 @@ static struct dentry *__get_parent(struct super_block *sb,
169 return dentry; 169 return dentry;
170} 170}
171 171
172struct dentry *ceph_get_parent(struct dentry *child) 172static struct dentry *ceph_get_parent(struct dentry *child)
173{ 173{
174 /* don't re-export snaps */ 174 /* don't re-export snaps */
175 if (ceph_snap(child->d_inode) != CEPH_NOSNAP) 175 if (ceph_snap(child->d_inode) != CEPH_NOSNAP)
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index e4fff9ff1c27..04c89c266cec 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -10,6 +10,7 @@
10#include <linux/writeback.h> 10#include <linux/writeback.h>
11#include <linux/vmalloc.h> 11#include <linux/vmalloc.h>
12#include <linux/posix_acl.h> 12#include <linux/posix_acl.h>
13#include <linux/random.h>
13 14
14#include "super.h" 15#include "super.h"
15#include "mds_client.h" 16#include "mds_client.h"
@@ -179,9 +180,8 @@ struct ceph_inode_frag *__ceph_find_frag(struct ceph_inode_info *ci, u32 f)
179 * specified, copy the frag delegation info to the caller if 180 * specified, copy the frag delegation info to the caller if
180 * it is present. 181 * it is present.
181 */ 182 */
182u32 ceph_choose_frag(struct ceph_inode_info *ci, u32 v, 183static u32 __ceph_choose_frag(struct ceph_inode_info *ci, u32 v,
183 struct ceph_inode_frag *pfrag, 184 struct ceph_inode_frag *pfrag, int *found)
184 int *found)
185{ 185{
186 u32 t = ceph_frag_make(0, 0); 186 u32 t = ceph_frag_make(0, 0);
187 struct ceph_inode_frag *frag; 187 struct ceph_inode_frag *frag;
@@ -191,7 +191,6 @@ u32 ceph_choose_frag(struct ceph_inode_info *ci, u32 v,
191 if (found) 191 if (found)
192 *found = 0; 192 *found = 0;
193 193
194 mutex_lock(&ci->i_fragtree_mutex);
195 while (1) { 194 while (1) {
196 WARN_ON(!ceph_frag_contains_value(t, v)); 195 WARN_ON(!ceph_frag_contains_value(t, v));
197 frag = __ceph_find_frag(ci, t); 196 frag = __ceph_find_frag(ci, t);
@@ -220,10 +219,19 @@ u32 ceph_choose_frag(struct ceph_inode_info *ci, u32 v,
220 } 219 }
221 dout("choose_frag(%x) = %x\n", v, t); 220 dout("choose_frag(%x) = %x\n", v, t);
222 221
223 mutex_unlock(&ci->i_fragtree_mutex);
224 return t; 222 return t;
225} 223}
226 224
225u32 ceph_choose_frag(struct ceph_inode_info *ci, u32 v,
226 struct ceph_inode_frag *pfrag, int *found)
227{
228 u32 ret;
229 mutex_lock(&ci->i_fragtree_mutex);
230 ret = __ceph_choose_frag(ci, v, pfrag, found);
231 mutex_unlock(&ci->i_fragtree_mutex);
232 return ret;
233}
234
227/* 235/*
228 * Process dirfrag (delegation) info from the mds. Include leaf 236 * Process dirfrag (delegation) info from the mds. Include leaf
229 * fragment in tree ONLY if ndist > 0. Otherwise, only 237 * fragment in tree ONLY if ndist > 0. Otherwise, only
@@ -237,11 +245,17 @@ static int ceph_fill_dirfrag(struct inode *inode,
237 u32 id = le32_to_cpu(dirinfo->frag); 245 u32 id = le32_to_cpu(dirinfo->frag);
238 int mds = le32_to_cpu(dirinfo->auth); 246 int mds = le32_to_cpu(dirinfo->auth);
239 int ndist = le32_to_cpu(dirinfo->ndist); 247 int ndist = le32_to_cpu(dirinfo->ndist);
248 int diri_auth = -1;
240 int i; 249 int i;
241 int err = 0; 250 int err = 0;
242 251
252 spin_lock(&ci->i_ceph_lock);
253 if (ci->i_auth_cap)
254 diri_auth = ci->i_auth_cap->mds;
255 spin_unlock(&ci->i_ceph_lock);
256
243 mutex_lock(&ci->i_fragtree_mutex); 257 mutex_lock(&ci->i_fragtree_mutex);
244 if (ndist == 0) { 258 if (ndist == 0 && mds == diri_auth) {
245 /* no delegation info needed. */ 259 /* no delegation info needed. */
246 frag = __ceph_find_frag(ci, id); 260 frag = __ceph_find_frag(ci, id);
247 if (!frag) 261 if (!frag)
@@ -286,6 +300,75 @@ out:
286 return err; 300 return err;
287} 301}
288 302
303static int ceph_fill_fragtree(struct inode *inode,
304 struct ceph_frag_tree_head *fragtree,
305 struct ceph_mds_reply_dirfrag *dirinfo)
306{
307 struct ceph_inode_info *ci = ceph_inode(inode);
308 struct ceph_inode_frag *frag;
309 struct rb_node *rb_node;
310 int i;
311 u32 id, nsplits;
312 bool update = false;
313
314 mutex_lock(&ci->i_fragtree_mutex);
315 nsplits = le32_to_cpu(fragtree->nsplits);
316 if (nsplits) {
317 i = prandom_u32() % nsplits;
318 id = le32_to_cpu(fragtree->splits[i].frag);
319 if (!__ceph_find_frag(ci, id))
320 update = true;
321 } else if (!RB_EMPTY_ROOT(&ci->i_fragtree)) {
322 rb_node = rb_first(&ci->i_fragtree);
323 frag = rb_entry(rb_node, struct ceph_inode_frag, node);
324 if (frag->frag != ceph_frag_make(0, 0) || rb_next(rb_node))
325 update = true;
326 }
327 if (!update && dirinfo) {
328 id = le32_to_cpu(dirinfo->frag);
329 if (id != __ceph_choose_frag(ci, id, NULL, NULL))
330 update = true;
331 }
332 if (!update)
333 goto out_unlock;
334
335 dout("fill_fragtree %llx.%llx\n", ceph_vinop(inode));
336 rb_node = rb_first(&ci->i_fragtree);
337 for (i = 0; i < nsplits; i++) {
338 id = le32_to_cpu(fragtree->splits[i].frag);
339 frag = NULL;
340 while (rb_node) {
341 frag = rb_entry(rb_node, struct ceph_inode_frag, node);
342 if (ceph_frag_compare(frag->frag, id) >= 0) {
343 if (frag->frag != id)
344 frag = NULL;
345 else
346 rb_node = rb_next(rb_node);
347 break;
348 }
349 rb_node = rb_next(rb_node);
350 rb_erase(&frag->node, &ci->i_fragtree);
351 kfree(frag);
352 frag = NULL;
353 }
354 if (!frag) {
355 frag = __get_or_create_frag(ci, id);
356 if (IS_ERR(frag))
357 continue;
358 }
359 frag->split_by = le32_to_cpu(fragtree->splits[i].by);
360 dout(" frag %x split by %d\n", frag->frag, frag->split_by);
361 }
362 while (rb_node) {
363 frag = rb_entry(rb_node, struct ceph_inode_frag, node);
364 rb_node = rb_next(rb_node);
365 rb_erase(&frag->node, &ci->i_fragtree);
366 kfree(frag);
367 }
368out_unlock:
369 mutex_unlock(&ci->i_fragtree_mutex);
370 return 0;
371}
289 372
290/* 373/*
291 * initialize a newly allocated inode. 374 * initialize a newly allocated inode.
@@ -341,7 +424,6 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
341 INIT_LIST_HEAD(&ci->i_cap_snaps); 424 INIT_LIST_HEAD(&ci->i_cap_snaps);
342 ci->i_head_snapc = NULL; 425 ci->i_head_snapc = NULL;
343 ci->i_snap_caps = 0; 426 ci->i_snap_caps = 0;
344 ci->i_cap_exporting_issued = 0;
345 427
346 for (i = 0; i < CEPH_FILE_MODE_NUM; i++) 428 for (i = 0; i < CEPH_FILE_MODE_NUM; i++)
347 ci->i_nr_by_mode[i] = 0; 429 ci->i_nr_by_mode[i] = 0;
@@ -407,7 +489,7 @@ void ceph_destroy_inode(struct inode *inode)
407 489
408 /* 490 /*
409 * we may still have a snap_realm reference if there are stray 491 * we may still have a snap_realm reference if there are stray
410 * caps in i_cap_exporting_issued or i_snap_caps. 492 * caps in i_snap_caps.
411 */ 493 */
412 if (ci->i_snap_realm) { 494 if (ci->i_snap_realm) {
413 struct ceph_mds_client *mdsc = 495 struct ceph_mds_client *mdsc =
@@ -582,22 +664,26 @@ static int fill_inode(struct inode *inode,
582 unsigned long ttl_from, int cap_fmode, 664 unsigned long ttl_from, int cap_fmode,
583 struct ceph_cap_reservation *caps_reservation) 665 struct ceph_cap_reservation *caps_reservation)
584{ 666{
667 struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
585 struct ceph_mds_reply_inode *info = iinfo->in; 668 struct ceph_mds_reply_inode *info = iinfo->in;
586 struct ceph_inode_info *ci = ceph_inode(inode); 669 struct ceph_inode_info *ci = ceph_inode(inode);
587 int i; 670 int issued = 0, implemented, new_issued;
588 int issued = 0, implemented;
589 struct timespec mtime, atime, ctime; 671 struct timespec mtime, atime, ctime;
590 u32 nsplits;
591 struct ceph_inode_frag *frag;
592 struct rb_node *rb_node;
593 struct ceph_buffer *xattr_blob = NULL; 672 struct ceph_buffer *xattr_blob = NULL;
673 struct ceph_cap *new_cap = NULL;
594 int err = 0; 674 int err = 0;
595 int queue_trunc = 0; 675 bool wake = false;
676 bool queue_trunc = false;
677 bool new_version = false;
596 678
597 dout("fill_inode %p ino %llx.%llx v %llu had %llu\n", 679 dout("fill_inode %p ino %llx.%llx v %llu had %llu\n",
598 inode, ceph_vinop(inode), le64_to_cpu(info->version), 680 inode, ceph_vinop(inode), le64_to_cpu(info->version),
599 ci->i_version); 681 ci->i_version);
600 682
683 /* prealloc new cap struct */
684 if (info->cap.caps && ceph_snap(inode) == CEPH_NOSNAP)
685 new_cap = ceph_get_cap(mdsc, caps_reservation);
686
601 /* 687 /*
602 * prealloc xattr data, if it looks like we'll need it. only 688 * prealloc xattr data, if it looks like we'll need it. only
603 * if len > 4 (meaning there are actually xattrs; the first 4 689 * if len > 4 (meaning there are actually xattrs; the first 4
@@ -623,19 +709,23 @@ static int fill_inode(struct inode *inode,
623 * 3 2 skip 709 * 3 2 skip
624 * 3 3 update 710 * 3 3 update
625 */ 711 */
626 if (le64_to_cpu(info->version) > 0 && 712 if (ci->i_version == 0 ||
627 (ci->i_version & ~1) >= le64_to_cpu(info->version)) 713 ((info->cap.flags & CEPH_CAP_FLAG_AUTH) &&
628 goto no_change; 714 le64_to_cpu(info->version) > (ci->i_version & ~1)))
629 715 new_version = true;
716
630 issued = __ceph_caps_issued(ci, &implemented); 717 issued = __ceph_caps_issued(ci, &implemented);
631 issued |= implemented | __ceph_caps_dirty(ci); 718 issued |= implemented | __ceph_caps_dirty(ci);
719 new_issued = ~issued & le32_to_cpu(info->cap.caps);
632 720
633 /* update inode */ 721 /* update inode */
634 ci->i_version = le64_to_cpu(info->version); 722 ci->i_version = le64_to_cpu(info->version);
635 inode->i_version++; 723 inode->i_version++;
636 inode->i_rdev = le32_to_cpu(info->rdev); 724 inode->i_rdev = le32_to_cpu(info->rdev);
725 inode->i_blkbits = fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1;
637 726
638 if ((issued & CEPH_CAP_AUTH_EXCL) == 0) { 727 if ((new_version || (new_issued & CEPH_CAP_AUTH_SHARED)) &&
728 (issued & CEPH_CAP_AUTH_EXCL) == 0) {
639 inode->i_mode = le32_to_cpu(info->mode); 729 inode->i_mode = le32_to_cpu(info->mode);
640 inode->i_uid = make_kuid(&init_user_ns, le32_to_cpu(info->uid)); 730 inode->i_uid = make_kuid(&init_user_ns, le32_to_cpu(info->uid));
641 inode->i_gid = make_kgid(&init_user_ns, le32_to_cpu(info->gid)); 731 inode->i_gid = make_kgid(&init_user_ns, le32_to_cpu(info->gid));
@@ -644,23 +734,35 @@ static int fill_inode(struct inode *inode,
644 from_kgid(&init_user_ns, inode->i_gid)); 734 from_kgid(&init_user_ns, inode->i_gid));
645 } 735 }
646 736
647 if ((issued & CEPH_CAP_LINK_EXCL) == 0) 737 if ((new_version || (new_issued & CEPH_CAP_LINK_SHARED)) &&
738 (issued & CEPH_CAP_LINK_EXCL) == 0)
648 set_nlink(inode, le32_to_cpu(info->nlink)); 739 set_nlink(inode, le32_to_cpu(info->nlink));
649 740
650 /* be careful with mtime, atime, size */ 741 if (new_version || (new_issued & CEPH_CAP_ANY_RD)) {
651 ceph_decode_timespec(&atime, &info->atime); 742 /* be careful with mtime, atime, size */
652 ceph_decode_timespec(&mtime, &info->mtime); 743 ceph_decode_timespec(&atime, &info->atime);
653 ceph_decode_timespec(&ctime, &info->ctime); 744 ceph_decode_timespec(&mtime, &info->mtime);
654 queue_trunc = ceph_fill_file_size(inode, issued, 745 ceph_decode_timespec(&ctime, &info->ctime);
655 le32_to_cpu(info->truncate_seq), 746 ceph_fill_file_time(inode, issued,
656 le64_to_cpu(info->truncate_size), 747 le32_to_cpu(info->time_warp_seq),
657 le64_to_cpu(info->size)); 748 &ctime, &mtime, &atime);
658 ceph_fill_file_time(inode, issued, 749 }
659 le32_to_cpu(info->time_warp_seq), 750
660 &ctime, &mtime, &atime); 751 if (new_version ||
661 752 (new_issued & (CEPH_CAP_ANY_FILE_RD | CEPH_CAP_ANY_FILE_WR))) {
662 ci->i_layout = info->layout; 753 ci->i_layout = info->layout;
663 inode->i_blkbits = fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1; 754 queue_trunc = ceph_fill_file_size(inode, issued,
755 le32_to_cpu(info->truncate_seq),
756 le64_to_cpu(info->truncate_size),
757 le64_to_cpu(info->size));
758 /* only update max_size on auth cap */
759 if ((info->cap.flags & CEPH_CAP_FLAG_AUTH) &&
760 ci->i_max_size != le64_to_cpu(info->max_size)) {
761 dout("max_size %lld -> %llu\n", ci->i_max_size,
762 le64_to_cpu(info->max_size));
763 ci->i_max_size = le64_to_cpu(info->max_size);
764 }
765 }
664 766
665 /* xattrs */ 767 /* xattrs */
666 /* note that if i_xattrs.len <= 4, i_xattrs.data will still be NULL. */ 768 /* note that if i_xattrs.len <= 4, i_xattrs.data will still be NULL. */
@@ -745,58 +847,6 @@ static int fill_inode(struct inode *inode,
745 dout(" marking %p complete (empty)\n", inode); 847 dout(" marking %p complete (empty)\n", inode);
746 __ceph_dir_set_complete(ci, atomic_read(&ci->i_release_count)); 848 __ceph_dir_set_complete(ci, atomic_read(&ci->i_release_count));
747 } 849 }
748no_change:
749 /* only update max_size on auth cap */
750 if ((info->cap.flags & CEPH_CAP_FLAG_AUTH) &&
751 ci->i_max_size != le64_to_cpu(info->max_size)) {
752 dout("max_size %lld -> %llu\n", ci->i_max_size,
753 le64_to_cpu(info->max_size));
754 ci->i_max_size = le64_to_cpu(info->max_size);
755 }
756
757 spin_unlock(&ci->i_ceph_lock);
758
759 /* queue truncate if we saw i_size decrease */
760 if (queue_trunc)
761 ceph_queue_vmtruncate(inode);
762
763 /* populate frag tree */
764 /* FIXME: move me up, if/when version reflects fragtree changes */
765 nsplits = le32_to_cpu(info->fragtree.nsplits);
766 mutex_lock(&ci->i_fragtree_mutex);
767 rb_node = rb_first(&ci->i_fragtree);
768 for (i = 0; i < nsplits; i++) {
769 u32 id = le32_to_cpu(info->fragtree.splits[i].frag);
770 frag = NULL;
771 while (rb_node) {
772 frag = rb_entry(rb_node, struct ceph_inode_frag, node);
773 if (ceph_frag_compare(frag->frag, id) >= 0) {
774 if (frag->frag != id)
775 frag = NULL;
776 else
777 rb_node = rb_next(rb_node);
778 break;
779 }
780 rb_node = rb_next(rb_node);
781 rb_erase(&frag->node, &ci->i_fragtree);
782 kfree(frag);
783 frag = NULL;
784 }
785 if (!frag) {
786 frag = __get_or_create_frag(ci, id);
787 if (IS_ERR(frag))
788 continue;
789 }
790 frag->split_by = le32_to_cpu(info->fragtree.splits[i].by);
791 dout(" frag %x split by %d\n", frag->frag, frag->split_by);
792 }
793 while (rb_node) {
794 frag = rb_entry(rb_node, struct ceph_inode_frag, node);
795 rb_node = rb_next(rb_node);
796 rb_erase(&frag->node, &ci->i_fragtree);
797 kfree(frag);
798 }
799 mutex_unlock(&ci->i_fragtree_mutex);
800 850
801 /* were we issued a capability? */ 851 /* were we issued a capability? */
802 if (info->cap.caps) { 852 if (info->cap.caps) {
@@ -809,30 +859,41 @@ no_change:
809 le32_to_cpu(info->cap.seq), 859 le32_to_cpu(info->cap.seq),
810 le32_to_cpu(info->cap.mseq), 860 le32_to_cpu(info->cap.mseq),
811 le64_to_cpu(info->cap.realm), 861 le64_to_cpu(info->cap.realm),
812 info->cap.flags, 862 info->cap.flags, &new_cap);
813 caps_reservation); 863 wake = true;
814 } else { 864 } else {
815 spin_lock(&ci->i_ceph_lock);
816 dout(" %p got snap_caps %s\n", inode, 865 dout(" %p got snap_caps %s\n", inode,
817 ceph_cap_string(le32_to_cpu(info->cap.caps))); 866 ceph_cap_string(le32_to_cpu(info->cap.caps)));
818 ci->i_snap_caps |= le32_to_cpu(info->cap.caps); 867 ci->i_snap_caps |= le32_to_cpu(info->cap.caps);
819 if (cap_fmode >= 0) 868 if (cap_fmode >= 0)
820 __ceph_get_fmode(ci, cap_fmode); 869 __ceph_get_fmode(ci, cap_fmode);
821 spin_unlock(&ci->i_ceph_lock);
822 } 870 }
823 } else if (cap_fmode >= 0) { 871 } else if (cap_fmode >= 0) {
824 pr_warn("mds issued no caps on %llx.%llx\n", 872 pr_warn("mds issued no caps on %llx.%llx\n",
825 ceph_vinop(inode)); 873 ceph_vinop(inode));
826 __ceph_get_fmode(ci, cap_fmode); 874 __ceph_get_fmode(ci, cap_fmode);
827 } 875 }
876 spin_unlock(&ci->i_ceph_lock);
877
878 if (wake)
879 wake_up_all(&ci->i_cap_wq);
880
881 /* queue truncate if we saw i_size decrease */
882 if (queue_trunc)
883 ceph_queue_vmtruncate(inode);
884
885 /* populate frag tree */
886 if (S_ISDIR(inode->i_mode))
887 ceph_fill_fragtree(inode, &info->fragtree, dirinfo);
828 888
829 /* update delegation info? */ 889 /* update delegation info? */
830 if (dirinfo) 890 if (dirinfo)
831 ceph_fill_dirfrag(inode, dirinfo); 891 ceph_fill_dirfrag(inode, dirinfo);
832 892
833 err = 0; 893 err = 0;
834
835out: 894out:
895 if (new_cap)
896 ceph_put_cap(mdsc, new_cap);
836 if (xattr_blob) 897 if (xattr_blob)
837 ceph_buffer_put(xattr_blob); 898 ceph_buffer_put(xattr_blob);
838 return err; 899 return err;
@@ -1485,7 +1546,7 @@ static void ceph_invalidate_work(struct work_struct *work)
1485 orig_gen = ci->i_rdcache_gen; 1546 orig_gen = ci->i_rdcache_gen;
1486 spin_unlock(&ci->i_ceph_lock); 1547 spin_unlock(&ci->i_ceph_lock);
1487 1548
1488 truncate_inode_pages(inode->i_mapping, 0); 1549 truncate_pagecache(inode, 0);
1489 1550
1490 spin_lock(&ci->i_ceph_lock); 1551 spin_lock(&ci->i_ceph_lock);
1491 if (orig_gen == ci->i_rdcache_gen && 1552 if (orig_gen == ci->i_rdcache_gen &&
@@ -1588,7 +1649,7 @@ retry:
1588 ci->i_truncate_pending, to); 1649 ci->i_truncate_pending, to);
1589 spin_unlock(&ci->i_ceph_lock); 1650 spin_unlock(&ci->i_ceph_lock);
1590 1651
1591 truncate_inode_pages(inode->i_mapping, to); 1652 truncate_pagecache(inode, to);
1592 1653
1593 spin_lock(&ci->i_ceph_lock); 1654 spin_lock(&ci->i_ceph_lock);
1594 if (to == ci->i_truncate_size) { 1655 if (to == ci->i_truncate_size) {
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 9a33b98cb000..92a2548278fc 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1558,6 +1558,8 @@ ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode)
1558 init_completion(&req->r_safe_completion); 1558 init_completion(&req->r_safe_completion);
1559 INIT_LIST_HEAD(&req->r_unsafe_item); 1559 INIT_LIST_HEAD(&req->r_unsafe_item);
1560 1560
1561 req->r_stamp = CURRENT_TIME;
1562
1561 req->r_op = op; 1563 req->r_op = op;
1562 req->r_direct_mode = mode; 1564 req->r_direct_mode = mode;
1563 return req; 1565 return req;
@@ -1783,7 +1785,8 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
1783 } 1785 }
1784 1786
1785 len = sizeof(*head) + 1787 len = sizeof(*head) +
1786 pathlen1 + pathlen2 + 2*(1 + sizeof(u32) + sizeof(u64)); 1788 pathlen1 + pathlen2 + 2*(1 + sizeof(u32) + sizeof(u64)) +
1789 sizeof(struct timespec);
1787 1790
1788 /* calculate (max) length for cap releases */ 1791 /* calculate (max) length for cap releases */
1789 len += sizeof(struct ceph_mds_request_release) * 1792 len += sizeof(struct ceph_mds_request_release) *
@@ -1800,6 +1803,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
1800 goto out_free2; 1803 goto out_free2;
1801 } 1804 }
1802 1805
1806 msg->hdr.version = 2;
1803 msg->hdr.tid = cpu_to_le64(req->r_tid); 1807 msg->hdr.tid = cpu_to_le64(req->r_tid);
1804 1808
1805 head = msg->front.iov_base; 1809 head = msg->front.iov_base;
@@ -1836,6 +1840,9 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
1836 mds, req->r_old_inode_drop, req->r_old_inode_unless, 0); 1840 mds, req->r_old_inode_drop, req->r_old_inode_unless, 0);
1837 head->num_releases = cpu_to_le16(releases); 1841 head->num_releases = cpu_to_le16(releases);
1838 1842
1843 /* time stamp */
1844 ceph_encode_copy(&p, &req->r_stamp, sizeof(req->r_stamp));
1845
1839 BUG_ON(p > end); 1846 BUG_ON(p > end);
1840 msg->front.iov_len = p - msg->front.iov_base; 1847 msg->front.iov_len = p - msg->front.iov_base;
1841 msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); 1848 msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index e90cfccf93bd..e00737cf523c 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -194,6 +194,7 @@ struct ceph_mds_request {
194 int r_fmode; /* file mode, if expecting cap */ 194 int r_fmode; /* file mode, if expecting cap */
195 kuid_t r_uid; 195 kuid_t r_uid;
196 kgid_t r_gid; 196 kgid_t r_gid;
197 struct timespec r_stamp;
197 198
198 /* for choosing which mds to send this request to */ 199 /* for choosing which mds to send this request to */
199 int r_direct_mode; 200 int r_direct_mode;
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index ead05cc1f447..12b20744e386 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -292,7 +292,6 @@ struct ceph_inode_info {
292 struct ceph_snap_context *i_head_snapc; /* set if wr_buffer_head > 0 or 292 struct ceph_snap_context *i_head_snapc; /* set if wr_buffer_head > 0 or
293 dirty|flushing caps */ 293 dirty|flushing caps */
294 unsigned i_snap_caps; /* cap bits for snapped files */ 294 unsigned i_snap_caps; /* cap bits for snapped files */
295 unsigned i_cap_exporting_issued;
296 295
297 int i_nr_by_mode[CEPH_FILE_MODE_NUM]; /* open file counts */ 296 int i_nr_by_mode[CEPH_FILE_MODE_NUM]; /* open file counts */
298 297
@@ -775,11 +774,13 @@ static inline void ceph_forget_all_cached_acls(struct inode *inode)
775extern const char *ceph_cap_string(int c); 774extern const char *ceph_cap_string(int c);
776extern void ceph_handle_caps(struct ceph_mds_session *session, 775extern void ceph_handle_caps(struct ceph_mds_session *session,
777 struct ceph_msg *msg); 776 struct ceph_msg *msg);
778extern int ceph_add_cap(struct inode *inode, 777extern struct ceph_cap *ceph_get_cap(struct ceph_mds_client *mdsc,
779 struct ceph_mds_session *session, u64 cap_id, 778 struct ceph_cap_reservation *ctx);
780 int fmode, unsigned issued, unsigned wanted, 779extern void ceph_add_cap(struct inode *inode,
781 unsigned cap, unsigned seq, u64 realmino, int flags, 780 struct ceph_mds_session *session, u64 cap_id,
782 struct ceph_cap_reservation *caps_reservation); 781 int fmode, unsigned issued, unsigned wanted,
782 unsigned cap, unsigned seq, u64 realmino, int flags,
783 struct ceph_cap **new_cap);
783extern void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release); 784extern void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release);
784extern void ceph_put_cap(struct ceph_mds_client *mdsc, 785extern void ceph_put_cap(struct ceph_mds_client *mdsc,
785 struct ceph_cap *cap); 786 struct ceph_cap *cap);
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 1e5b45359509..d08e079ea5d3 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -617,6 +617,11 @@ static void retry_failed_sctp_send(struct connection *recv_con,
617 int nodeid = sn_send_failed->ssf_info.sinfo_ppid; 617 int nodeid = sn_send_failed->ssf_info.sinfo_ppid;
618 618
619 log_print("Retry sending %d bytes to node id %d", len, nodeid); 619 log_print("Retry sending %d bytes to node id %d", len, nodeid);
620
621 if (!nodeid) {
622 log_print("Shouldn't resend data via listening connection.");
623 return;
624 }
620 625
621 con = nodeid2con(nodeid, 0); 626 con = nodeid2con(nodeid, 0);
622 if (!con) { 627 if (!con) {
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index b73e0621ce9e..b10b48c2a7af 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -910,7 +910,7 @@ static const struct file_operations eventpoll_fops = {
910void eventpoll_release_file(struct file *file) 910void eventpoll_release_file(struct file *file)
911{ 911{
912 struct eventpoll *ep; 912 struct eventpoll *ep;
913 struct epitem *epi; 913 struct epitem *epi, *next;
914 914
915 /* 915 /*
916 * We don't want to get "file->f_lock" because it is not 916 * We don't want to get "file->f_lock" because it is not
@@ -926,7 +926,7 @@ void eventpoll_release_file(struct file *file)
926 * Besides, ep_remove() acquires the lock, so we can't hold it here. 926 * Besides, ep_remove() acquires the lock, so we can't hold it here.
927 */ 927 */
928 mutex_lock(&epmutex); 928 mutex_lock(&epmutex);
929 list_for_each_entry_rcu(epi, &file->f_ep_links, fllink) { 929 list_for_each_entry_safe(epi, next, &file->f_ep_links, fllink) {
930 ep = epi->ep; 930 ep = epi->ep;
931 mutex_lock_nested(&ep->mtx, 0); 931 mutex_lock_nested(&ep->mtx, 0);
932 ep_remove(ep, epi); 932 ep_remove(ep, epi);