aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/binfmt_misc.c7
-rw-r--r--fs/btrfs/ctree.h4
-rw-r--r--fs/btrfs/disk-io.c6
-rw-r--r--fs/btrfs/extent-tree.c23
-rw-r--r--fs/btrfs/free-space-cache.c12
-rw-r--r--fs/btrfs/volumes.c2
-rw-r--r--fs/ceph/addr.c273
-rw-r--r--fs/ceph/caps.c132
-rw-r--r--fs/ceph/dir.c27
-rw-r--r--fs/ceph/file.c97
-rw-r--r--fs/ceph/inode.c59
-rw-r--r--fs/ceph/locks.c64
-rw-r--r--fs/ceph/mds_client.c41
-rw-r--r--fs/ceph/mds_client.h10
-rw-r--r--fs/ceph/snap.c37
-rw-r--r--fs/ceph/super.c16
-rw-r--r--fs/ceph/super.h55
-rw-r--r--fs/ceph/xattr.c7
-rw-r--r--fs/ecryptfs/crypto.c1
-rw-r--r--fs/ecryptfs/file.c12
-rw-r--r--fs/ecryptfs/keystore.c6
-rw-r--r--fs/ecryptfs/main.c16
-rw-r--r--fs/ext4/move_extent.c4
-rw-r--r--fs/fuse/cuse.c2
-rw-r--r--fs/fuse/dev.c29
-rw-r--r--fs/fuse/dir.c538
-rw-r--r--fs/fuse/file.c230
-rw-r--r--fs/fuse/fuse_i.h45
-rw-r--r--fs/fuse/inode.c39
-rw-r--r--fs/hfsplus/catalog.c89
-rw-r--r--fs/hfsplus/dir.c11
-rw-r--r--fs/hfsplus/hfsplus_fs.h4
-rw-r--r--fs/hfsplus/super.c4
-rw-r--r--fs/jffs2/readinode.c2
-rw-r--r--fs/jffs2/summary.c1
-rw-r--r--fs/kernfs/file.c22
-rw-r--r--fs/namespace.c20
-rw-r--r--fs/ocfs2/alloc.c28
-rw-r--r--fs/ocfs2/alloc.h2
-rw-r--r--fs/ocfs2/aops.c16
-rw-r--r--fs/ocfs2/dir.c2
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c12
-rw-r--r--fs/ocfs2/file.c2
-rw-r--r--fs/pnode.c1
-rw-r--r--fs/proc/base.c53
-rw-r--r--fs/proc/meminfo.c15
-rw-r--r--fs/proc/stat.c2
-rw-r--r--fs/proc_namespace.c16
48 files changed, 1320 insertions, 776 deletions
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index c04ef1d4f18a..97aff2879cda 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -254,6 +254,7 @@ static char *scanarg(char *s, char del)
254 return NULL; 254 return NULL;
255 } 255 }
256 } 256 }
257 s[-1] ='\0';
257 return s; 258 return s;
258} 259}
259 260
@@ -378,8 +379,7 @@ static Node *create_entry(const char __user *buffer, size_t count)
378 p = scanarg(p, del); 379 p = scanarg(p, del);
379 if (!p) 380 if (!p)
380 goto einval; 381 goto einval;
381 p[-1] = '\0'; 382 if (!e->magic[0])
382 if (p == e->magic)
383 goto einval; 383 goto einval;
384 if (USE_DEBUG) 384 if (USE_DEBUG)
385 print_hex_dump_bytes( 385 print_hex_dump_bytes(
@@ -391,8 +391,7 @@ static Node *create_entry(const char __user *buffer, size_t count)
391 p = scanarg(p, del); 391 p = scanarg(p, del);
392 if (!p) 392 if (!p)
393 goto einval; 393 goto einval;
394 p[-1] = '\0'; 394 if (!e->mask[0]) {
395 if (p == e->mask) {
396 e->mask = NULL; 395 e->mask = NULL;
397 pr_debug("register: mask[raw]: none\n"); 396 pr_debug("register: mask[raw]: none\n");
398 } else if (USE_DEBUG) 397 } else if (USE_DEBUG)
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index e6fbbd74b716..7e607416755a 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3481,8 +3481,8 @@ void btrfs_put_block_group_cache(struct btrfs_fs_info *info);
3481u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo); 3481u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo);
3482int btrfs_error_unpin_extent_range(struct btrfs_root *root, 3482int btrfs_error_unpin_extent_range(struct btrfs_root *root,
3483 u64 start, u64 end); 3483 u64 start, u64 end);
3484int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr, 3484int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
3485 u64 num_bytes, u64 *actual_bytes); 3485 u64 num_bytes, u64 *actual_bytes);
3486int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, 3486int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans,
3487 struct btrfs_root *root, u64 type); 3487 struct btrfs_root *root, u64 type);
3488int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range); 3488int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 30965120772b..8c63419a7f70 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -4121,12 +4121,6 @@ again:
4121 if (ret) 4121 if (ret)
4122 break; 4122 break;
4123 4123
4124 /* opt_discard */
4125 if (btrfs_test_opt(root, DISCARD))
4126 ret = btrfs_error_discard_extent(root, start,
4127 end + 1 - start,
4128 NULL);
4129
4130 clear_extent_dirty(unpin, start, end, GFP_NOFS); 4124 clear_extent_dirty(unpin, start, end, GFP_NOFS);
4131 btrfs_error_unpin_extent_range(root, start, end); 4125 btrfs_error_unpin_extent_range(root, start, end);
4132 cond_resched(); 4126 cond_resched();
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 222d6aea4a8a..a80b97100d90 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -1889,8 +1889,8 @@ static int btrfs_issue_discard(struct block_device *bdev,
1889 return blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_NOFS, 0); 1889 return blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_NOFS, 0);
1890} 1890}
1891 1891
1892static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, 1892int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
1893 u64 num_bytes, u64 *actual_bytes) 1893 u64 num_bytes, u64 *actual_bytes)
1894{ 1894{
1895 int ret; 1895 int ret;
1896 u64 discarded_bytes = 0; 1896 u64 discarded_bytes = 0;
@@ -5727,7 +5727,8 @@ void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
5727 update_global_block_rsv(fs_info); 5727 update_global_block_rsv(fs_info);
5728} 5728}
5729 5729
5730static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end) 5730static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end,
5731 const bool return_free_space)
5731{ 5732{
5732 struct btrfs_fs_info *fs_info = root->fs_info; 5733 struct btrfs_fs_info *fs_info = root->fs_info;
5733 struct btrfs_block_group_cache *cache = NULL; 5734 struct btrfs_block_group_cache *cache = NULL;
@@ -5751,7 +5752,8 @@ static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end)
5751 5752
5752 if (start < cache->last_byte_to_unpin) { 5753 if (start < cache->last_byte_to_unpin) {
5753 len = min(len, cache->last_byte_to_unpin - start); 5754 len = min(len, cache->last_byte_to_unpin - start);
5754 btrfs_add_free_space(cache, start, len); 5755 if (return_free_space)
5756 btrfs_add_free_space(cache, start, len);
5755 } 5757 }
5756 5758
5757 start += len; 5759 start += len;
@@ -5815,7 +5817,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
5815 end + 1 - start, NULL); 5817 end + 1 - start, NULL);
5816 5818
5817 clear_extent_dirty(unpin, start, end, GFP_NOFS); 5819 clear_extent_dirty(unpin, start, end, GFP_NOFS);
5818 unpin_extent_range(root, start, end); 5820 unpin_extent_range(root, start, end, true);
5819 cond_resched(); 5821 cond_resched();
5820 } 5822 }
5821 5823
@@ -8872,6 +8874,7 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
8872 cache_node); 8874 cache_node);
8873 rb_erase(&block_group->cache_node, 8875 rb_erase(&block_group->cache_node,
8874 &info->block_group_cache_tree); 8876 &info->block_group_cache_tree);
8877 RB_CLEAR_NODE(&block_group->cache_node);
8875 spin_unlock(&info->block_group_cache_lock); 8878 spin_unlock(&info->block_group_cache_lock);
8876 8879
8877 down_write(&block_group->space_info->groups_sem); 8880 down_write(&block_group->space_info->groups_sem);
@@ -9130,6 +9133,7 @@ int btrfs_read_block_groups(struct btrfs_root *root)
9130 spin_lock(&info->block_group_cache_lock); 9133 spin_lock(&info->block_group_cache_lock);
9131 rb_erase(&cache->cache_node, 9134 rb_erase(&cache->cache_node,
9132 &info->block_group_cache_tree); 9135 &info->block_group_cache_tree);
9136 RB_CLEAR_NODE(&cache->cache_node);
9133 spin_unlock(&info->block_group_cache_lock); 9137 spin_unlock(&info->block_group_cache_lock);
9134 btrfs_put_block_group(cache); 9138 btrfs_put_block_group(cache);
9135 goto error; 9139 goto error;
@@ -9271,6 +9275,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
9271 spin_lock(&root->fs_info->block_group_cache_lock); 9275 spin_lock(&root->fs_info->block_group_cache_lock);
9272 rb_erase(&cache->cache_node, 9276 rb_erase(&cache->cache_node,
9273 &root->fs_info->block_group_cache_tree); 9277 &root->fs_info->block_group_cache_tree);
9278 RB_CLEAR_NODE(&cache->cache_node);
9274 spin_unlock(&root->fs_info->block_group_cache_lock); 9279 spin_unlock(&root->fs_info->block_group_cache_lock);
9275 btrfs_put_block_group(cache); 9280 btrfs_put_block_group(cache);
9276 return ret; 9281 return ret;
@@ -9690,13 +9695,7 @@ out:
9690 9695
9691int btrfs_error_unpin_extent_range(struct btrfs_root *root, u64 start, u64 end) 9696int btrfs_error_unpin_extent_range(struct btrfs_root *root, u64 start, u64 end)
9692{ 9697{
9693 return unpin_extent_range(root, start, end); 9698 return unpin_extent_range(root, start, end, false);
9694}
9695
9696int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr,
9697 u64 num_bytes, u64 *actual_bytes)
9698{
9699 return btrfs_discard_extent(root, bytenr, num_bytes, actual_bytes);
9700} 9699}
9701 9700
9702int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range) 9701int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range)
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 030847bf7cec..d6c03f7f136b 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -2966,8 +2966,8 @@ static int do_trimming(struct btrfs_block_group_cache *block_group,
2966 spin_unlock(&block_group->lock); 2966 spin_unlock(&block_group->lock);
2967 spin_unlock(&space_info->lock); 2967 spin_unlock(&space_info->lock);
2968 2968
2969 ret = btrfs_error_discard_extent(fs_info->extent_root, 2969 ret = btrfs_discard_extent(fs_info->extent_root,
2970 start, bytes, &trimmed); 2970 start, bytes, &trimmed);
2971 if (!ret) 2971 if (!ret)
2972 *total_trimmed += trimmed; 2972 *total_trimmed += trimmed;
2973 2973
@@ -3185,16 +3185,18 @@ out:
3185 3185
3186 spin_unlock(&block_group->lock); 3186 spin_unlock(&block_group->lock);
3187 3187
3188 lock_chunks(block_group->fs_info->chunk_root);
3188 em_tree = &block_group->fs_info->mapping_tree.map_tree; 3189 em_tree = &block_group->fs_info->mapping_tree.map_tree;
3189 write_lock(&em_tree->lock); 3190 write_lock(&em_tree->lock);
3190 em = lookup_extent_mapping(em_tree, block_group->key.objectid, 3191 em = lookup_extent_mapping(em_tree, block_group->key.objectid,
3191 1); 3192 1);
3192 BUG_ON(!em); /* logic error, can't happen */ 3193 BUG_ON(!em); /* logic error, can't happen */
3194 /*
3195 * remove_extent_mapping() will delete us from the pinned_chunks
3196 * list, which is protected by the chunk mutex.
3197 */
3193 remove_extent_mapping(em_tree, em); 3198 remove_extent_mapping(em_tree, em);
3194 write_unlock(&em_tree->lock); 3199 write_unlock(&em_tree->lock);
3195
3196 lock_chunks(block_group->fs_info->chunk_root);
3197 list_del_init(&em->list);
3198 unlock_chunks(block_group->fs_info->chunk_root); 3200 unlock_chunks(block_group->fs_info->chunk_root);
3199 3201
3200 /* once for us and once for the tree */ 3202 /* once for us and once for the tree */
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 0144790e296e..50c5a8762aed 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1485,7 +1485,7 @@ static void update_dev_time(char *path_name)
1485 struct file *filp; 1485 struct file *filp;
1486 1486
1487 filp = filp_open(path_name, O_RDWR, 0); 1487 filp = filp_open(path_name, O_RDWR, 0);
1488 if (!filp) 1488 if (IS_ERR(filp))
1489 return; 1489 return;
1490 file_update_time(filp); 1490 file_update_time(filp);
1491 filp_close(filp, NULL); 1491 filp_close(filp, NULL);
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 18c06bbaf136..f5013d92a7e6 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -192,17 +192,30 @@ static int readpage_nounlock(struct file *filp, struct page *page)
192 struct ceph_osd_client *osdc = 192 struct ceph_osd_client *osdc =
193 &ceph_inode_to_client(inode)->client->osdc; 193 &ceph_inode_to_client(inode)->client->osdc;
194 int err = 0; 194 int err = 0;
195 u64 off = page_offset(page);
195 u64 len = PAGE_CACHE_SIZE; 196 u64 len = PAGE_CACHE_SIZE;
196 197
197 err = ceph_readpage_from_fscache(inode, page); 198 if (off >= i_size_read(inode)) {
199 zero_user_segment(page, err, PAGE_CACHE_SIZE);
200 SetPageUptodate(page);
201 return 0;
202 }
198 203
204 /*
205 * Uptodate inline data should have been added into page cache
206 * while getting Fcr caps.
207 */
208 if (ci->i_inline_version != CEPH_INLINE_NONE)
209 return -EINVAL;
210
211 err = ceph_readpage_from_fscache(inode, page);
199 if (err == 0) 212 if (err == 0)
200 goto out; 213 goto out;
201 214
202 dout("readpage inode %p file %p page %p index %lu\n", 215 dout("readpage inode %p file %p page %p index %lu\n",
203 inode, filp, page, page->index); 216 inode, filp, page, page->index);
204 err = ceph_osdc_readpages(osdc, ceph_vino(inode), &ci->i_layout, 217 err = ceph_osdc_readpages(osdc, ceph_vino(inode), &ci->i_layout,
205 (u64) page_offset(page), &len, 218 off, &len,
206 ci->i_truncate_seq, ci->i_truncate_size, 219 ci->i_truncate_seq, ci->i_truncate_size,
207 &page, 1, 0); 220 &page, 1, 0);
208 if (err == -ENOENT) 221 if (err == -ENOENT)
@@ -319,7 +332,7 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max)
319 off, len); 332 off, len);
320 vino = ceph_vino(inode); 333 vino = ceph_vino(inode);
321 req = ceph_osdc_new_request(osdc, &ci->i_layout, vino, off, &len, 334 req = ceph_osdc_new_request(osdc, &ci->i_layout, vino, off, &len,
322 1, CEPH_OSD_OP_READ, 335 0, 1, CEPH_OSD_OP_READ,
323 CEPH_OSD_FLAG_READ, NULL, 336 CEPH_OSD_FLAG_READ, NULL,
324 ci->i_truncate_seq, ci->i_truncate_size, 337 ci->i_truncate_seq, ci->i_truncate_size,
325 false); 338 false);
@@ -384,6 +397,9 @@ static int ceph_readpages(struct file *file, struct address_space *mapping,
384 int rc = 0; 397 int rc = 0;
385 int max = 0; 398 int max = 0;
386 399
400 if (ceph_inode(inode)->i_inline_version != CEPH_INLINE_NONE)
401 return -EINVAL;
402
387 rc = ceph_readpages_from_fscache(mapping->host, mapping, page_list, 403 rc = ceph_readpages_from_fscache(mapping->host, mapping, page_list,
388 &nr_pages); 404 &nr_pages);
389 405
@@ -673,7 +689,7 @@ static int ceph_writepages_start(struct address_space *mapping,
673 int rc = 0; 689 int rc = 0;
674 unsigned wsize = 1 << inode->i_blkbits; 690 unsigned wsize = 1 << inode->i_blkbits;
675 struct ceph_osd_request *req = NULL; 691 struct ceph_osd_request *req = NULL;
676 int do_sync; 692 int do_sync = 0;
677 u64 truncate_size, snap_size; 693 u64 truncate_size, snap_size;
678 u32 truncate_seq; 694 u32 truncate_seq;
679 695
@@ -750,7 +766,6 @@ retry:
750 last_snapc = snapc; 766 last_snapc = snapc;
751 767
752 while (!done && index <= end) { 768 while (!done && index <= end) {
753 int num_ops = do_sync ? 2 : 1;
754 unsigned i; 769 unsigned i;
755 int first; 770 int first;
756 pgoff_t next; 771 pgoff_t next;
@@ -850,7 +865,8 @@ get_more_pages:
850 len = wsize; 865 len = wsize;
851 req = ceph_osdc_new_request(&fsc->client->osdc, 866 req = ceph_osdc_new_request(&fsc->client->osdc,
852 &ci->i_layout, vino, 867 &ci->i_layout, vino,
853 offset, &len, num_ops, 868 offset, &len, 0,
869 do_sync ? 2 : 1,
854 CEPH_OSD_OP_WRITE, 870 CEPH_OSD_OP_WRITE,
855 CEPH_OSD_FLAG_WRITE | 871 CEPH_OSD_FLAG_WRITE |
856 CEPH_OSD_FLAG_ONDISK, 872 CEPH_OSD_FLAG_ONDISK,
@@ -862,6 +878,9 @@ get_more_pages:
862 break; 878 break;
863 } 879 }
864 880
881 if (do_sync)
882 osd_req_op_init(req, 1, CEPH_OSD_OP_STARTSYNC);
883
865 req->r_callback = writepages_finish; 884 req->r_callback = writepages_finish;
866 req->r_inode = inode; 885 req->r_inode = inode;
867 886
@@ -1204,6 +1223,7 @@ static int ceph_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1204 struct inode *inode = file_inode(vma->vm_file); 1223 struct inode *inode = file_inode(vma->vm_file);
1205 struct ceph_inode_info *ci = ceph_inode(inode); 1224 struct ceph_inode_info *ci = ceph_inode(inode);
1206 struct ceph_file_info *fi = vma->vm_file->private_data; 1225 struct ceph_file_info *fi = vma->vm_file->private_data;
1226 struct page *pinned_page = NULL;
1207 loff_t off = vmf->pgoff << PAGE_CACHE_SHIFT; 1227 loff_t off = vmf->pgoff << PAGE_CACHE_SHIFT;
1208 int want, got, ret; 1228 int want, got, ret;
1209 1229
@@ -1215,7 +1235,8 @@ static int ceph_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1215 want = CEPH_CAP_FILE_CACHE; 1235 want = CEPH_CAP_FILE_CACHE;
1216 while (1) { 1236 while (1) {
1217 got = 0; 1237 got = 0;
1218 ret = ceph_get_caps(ci, CEPH_CAP_FILE_RD, want, &got, -1); 1238 ret = ceph_get_caps(ci, CEPH_CAP_FILE_RD, want,
1239 -1, &got, &pinned_page);
1219 if (ret == 0) 1240 if (ret == 0)
1220 break; 1241 break;
1221 if (ret != -ERESTARTSYS) { 1242 if (ret != -ERESTARTSYS) {
@@ -1226,12 +1247,54 @@ static int ceph_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1226 dout("filemap_fault %p %llu~%zd got cap refs on %s\n", 1247 dout("filemap_fault %p %llu~%zd got cap refs on %s\n",
1227 inode, off, (size_t)PAGE_CACHE_SIZE, ceph_cap_string(got)); 1248 inode, off, (size_t)PAGE_CACHE_SIZE, ceph_cap_string(got));
1228 1249
1229 ret = filemap_fault(vma, vmf); 1250 if ((got & (CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO)) ||
1251 ci->i_inline_version == CEPH_INLINE_NONE)
1252 ret = filemap_fault(vma, vmf);
1253 else
1254 ret = -EAGAIN;
1230 1255
1231 dout("filemap_fault %p %llu~%zd dropping cap refs on %s ret %d\n", 1256 dout("filemap_fault %p %llu~%zd dropping cap refs on %s ret %d\n",
1232 inode, off, (size_t)PAGE_CACHE_SIZE, ceph_cap_string(got), ret); 1257 inode, off, (size_t)PAGE_CACHE_SIZE, ceph_cap_string(got), ret);
1258 if (pinned_page)
1259 page_cache_release(pinned_page);
1233 ceph_put_cap_refs(ci, got); 1260 ceph_put_cap_refs(ci, got);
1234 1261
1262 if (ret != -EAGAIN)
1263 return ret;
1264
1265 /* read inline data */
1266 if (off >= PAGE_CACHE_SIZE) {
1267 /* does not support inline data > PAGE_SIZE */
1268 ret = VM_FAULT_SIGBUS;
1269 } else {
1270 int ret1;
1271 struct address_space *mapping = inode->i_mapping;
1272 struct page *page = find_or_create_page(mapping, 0,
1273 mapping_gfp_mask(mapping) &
1274 ~__GFP_FS);
1275 if (!page) {
1276 ret = VM_FAULT_OOM;
1277 goto out;
1278 }
1279 ret1 = __ceph_do_getattr(inode, page,
1280 CEPH_STAT_CAP_INLINE_DATA, true);
1281 if (ret1 < 0 || off >= i_size_read(inode)) {
1282 unlock_page(page);
1283 page_cache_release(page);
1284 ret = VM_FAULT_SIGBUS;
1285 goto out;
1286 }
1287 if (ret1 < PAGE_CACHE_SIZE)
1288 zero_user_segment(page, ret1, PAGE_CACHE_SIZE);
1289 else
1290 flush_dcache_page(page);
1291 SetPageUptodate(page);
1292 vmf->page = page;
1293 ret = VM_FAULT_MAJOR | VM_FAULT_LOCKED;
1294 }
1295out:
1296 dout("filemap_fault %p %llu~%zd read inline data ret %d\n",
1297 inode, off, (size_t)PAGE_CACHE_SIZE, ret);
1235 return ret; 1298 return ret;
1236} 1299}
1237 1300
@@ -1250,6 +1313,19 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
1250 size_t len; 1313 size_t len;
1251 int want, got, ret; 1314 int want, got, ret;
1252 1315
1316 if (ci->i_inline_version != CEPH_INLINE_NONE) {
1317 struct page *locked_page = NULL;
1318 if (off == 0) {
1319 lock_page(page);
1320 locked_page = page;
1321 }
1322 ret = ceph_uninline_data(vma->vm_file, locked_page);
1323 if (locked_page)
1324 unlock_page(locked_page);
1325 if (ret < 0)
1326 return VM_FAULT_SIGBUS;
1327 }
1328
1253 if (off + PAGE_CACHE_SIZE <= size) 1329 if (off + PAGE_CACHE_SIZE <= size)
1254 len = PAGE_CACHE_SIZE; 1330 len = PAGE_CACHE_SIZE;
1255 else 1331 else
@@ -1263,7 +1339,8 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
1263 want = CEPH_CAP_FILE_BUFFER; 1339 want = CEPH_CAP_FILE_BUFFER;
1264 while (1) { 1340 while (1) {
1265 got = 0; 1341 got = 0;
1266 ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, &got, off + len); 1342 ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, off + len,
1343 &got, NULL);
1267 if (ret == 0) 1344 if (ret == 0)
1268 break; 1345 break;
1269 if (ret != -ERESTARTSYS) { 1346 if (ret != -ERESTARTSYS) {
@@ -1297,11 +1374,13 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
1297 ret = VM_FAULT_SIGBUS; 1374 ret = VM_FAULT_SIGBUS;
1298 } 1375 }
1299out: 1376out:
1300 if (ret != VM_FAULT_LOCKED) { 1377 if (ret != VM_FAULT_LOCKED)
1301 unlock_page(page); 1378 unlock_page(page);
1302 } else { 1379 if (ret == VM_FAULT_LOCKED ||
1380 ci->i_inline_version != CEPH_INLINE_NONE) {
1303 int dirty; 1381 int dirty;
1304 spin_lock(&ci->i_ceph_lock); 1382 spin_lock(&ci->i_ceph_lock);
1383 ci->i_inline_version = CEPH_INLINE_NONE;
1305 dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR); 1384 dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR);
1306 spin_unlock(&ci->i_ceph_lock); 1385 spin_unlock(&ci->i_ceph_lock);
1307 if (dirty) 1386 if (dirty)
@@ -1315,6 +1394,178 @@ out:
1315 return ret; 1394 return ret;
1316} 1395}
1317 1396
1397void ceph_fill_inline_data(struct inode *inode, struct page *locked_page,
1398 char *data, size_t len)
1399{
1400 struct address_space *mapping = inode->i_mapping;
1401 struct page *page;
1402
1403 if (locked_page) {
1404 page = locked_page;
1405 } else {
1406 if (i_size_read(inode) == 0)
1407 return;
1408 page = find_or_create_page(mapping, 0,
1409 mapping_gfp_mask(mapping) & ~__GFP_FS);
1410 if (!page)
1411 return;
1412 if (PageUptodate(page)) {
1413 unlock_page(page);
1414 page_cache_release(page);
1415 return;
1416 }
1417 }
1418
1419 dout("fill_inline_data %p %llx.%llx len %lu locked_page %p\n",
1420 inode, ceph_vinop(inode), len, locked_page);
1421
1422 if (len > 0) {
1423 void *kaddr = kmap_atomic(page);
1424 memcpy(kaddr, data, len);
1425 kunmap_atomic(kaddr);
1426 }
1427
1428 if (page != locked_page) {
1429 if (len < PAGE_CACHE_SIZE)
1430 zero_user_segment(page, len, PAGE_CACHE_SIZE);
1431 else
1432 flush_dcache_page(page);
1433
1434 SetPageUptodate(page);
1435 unlock_page(page);
1436 page_cache_release(page);
1437 }
1438}
1439
1440int ceph_uninline_data(struct file *filp, struct page *locked_page)
1441{
1442 struct inode *inode = file_inode(filp);
1443 struct ceph_inode_info *ci = ceph_inode(inode);
1444 struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
1445 struct ceph_osd_request *req;
1446 struct page *page = NULL;
1447 u64 len, inline_version;
1448 int err = 0;
1449 bool from_pagecache = false;
1450
1451 spin_lock(&ci->i_ceph_lock);
1452 inline_version = ci->i_inline_version;
1453 spin_unlock(&ci->i_ceph_lock);
1454
1455 dout("uninline_data %p %llx.%llx inline_version %llu\n",
1456 inode, ceph_vinop(inode), inline_version);
1457
1458 if (inline_version == 1 || /* initial version, no data */
1459 inline_version == CEPH_INLINE_NONE)
1460 goto out;
1461
1462 if (locked_page) {
1463 page = locked_page;
1464 WARN_ON(!PageUptodate(page));
1465 } else if (ceph_caps_issued(ci) &
1466 (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) {
1467 page = find_get_page(inode->i_mapping, 0);
1468 if (page) {
1469 if (PageUptodate(page)) {
1470 from_pagecache = true;
1471 lock_page(page);
1472 } else {
1473 page_cache_release(page);
1474 page = NULL;
1475 }
1476 }
1477 }
1478
1479 if (page) {
1480 len = i_size_read(inode);
1481 if (len > PAGE_CACHE_SIZE)
1482 len = PAGE_CACHE_SIZE;
1483 } else {
1484 page = __page_cache_alloc(GFP_NOFS);
1485 if (!page) {
1486 err = -ENOMEM;
1487 goto out;
1488 }
1489 err = __ceph_do_getattr(inode, page,
1490 CEPH_STAT_CAP_INLINE_DATA, true);
1491 if (err < 0) {
1492 /* no inline data */
1493 if (err == -ENODATA)
1494 err = 0;
1495 goto out;
1496 }
1497 len = err;
1498 }
1499
1500 req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
1501 ceph_vino(inode), 0, &len, 0, 1,
1502 CEPH_OSD_OP_CREATE,
1503 CEPH_OSD_FLAG_ONDISK | CEPH_OSD_FLAG_WRITE,
1504 ci->i_snap_realm->cached_context,
1505 0, 0, false);
1506 if (IS_ERR(req)) {
1507 err = PTR_ERR(req);
1508 goto out;
1509 }
1510
1511 ceph_osdc_build_request(req, 0, NULL, CEPH_NOSNAP, &inode->i_mtime);
1512 err = ceph_osdc_start_request(&fsc->client->osdc, req, false);
1513 if (!err)
1514 err = ceph_osdc_wait_request(&fsc->client->osdc, req);
1515 ceph_osdc_put_request(req);
1516 if (err < 0)
1517 goto out;
1518
1519 req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
1520 ceph_vino(inode), 0, &len, 1, 3,
1521 CEPH_OSD_OP_WRITE,
1522 CEPH_OSD_FLAG_ONDISK | CEPH_OSD_FLAG_WRITE,
1523 ci->i_snap_realm->cached_context,
1524 ci->i_truncate_seq, ci->i_truncate_size,
1525 false);
1526 if (IS_ERR(req)) {
1527 err = PTR_ERR(req);
1528 goto out;
1529 }
1530
1531 osd_req_op_extent_osd_data_pages(req, 1, &page, len, 0, false, false);
1532
1533 err = osd_req_op_xattr_init(req, 0, CEPH_OSD_OP_CMPXATTR,
1534 "inline_version", &inline_version,
1535 sizeof(inline_version),
1536 CEPH_OSD_CMPXATTR_OP_GT,
1537 CEPH_OSD_CMPXATTR_MODE_U64);
1538 if (err)
1539 goto out_put;
1540
1541 err = osd_req_op_xattr_init(req, 2, CEPH_OSD_OP_SETXATTR,
1542 "inline_version", &inline_version,
1543 sizeof(inline_version), 0, 0);
1544 if (err)
1545 goto out_put;
1546
1547 ceph_osdc_build_request(req, 0, NULL, CEPH_NOSNAP, &inode->i_mtime);
1548 err = ceph_osdc_start_request(&fsc->client->osdc, req, false);
1549 if (!err)
1550 err = ceph_osdc_wait_request(&fsc->client->osdc, req);
1551out_put:
1552 ceph_osdc_put_request(req);
1553 if (err == -ECANCELED)
1554 err = 0;
1555out:
1556 if (page && page != locked_page) {
1557 if (from_pagecache) {
1558 unlock_page(page);
1559 page_cache_release(page);
1560 } else
1561 __free_pages(page, 0);
1562 }
1563
1564 dout("uninline_data %p %llx.%llx inline_version %llu = %d\n",
1565 inode, ceph_vinop(inode), inline_version, err);
1566 return err;
1567}
1568
1318static struct vm_operations_struct ceph_vmops = { 1569static struct vm_operations_struct ceph_vmops = {
1319 .fault = ceph_filemap_fault, 1570 .fault = ceph_filemap_fault,
1320 .page_mkwrite = ceph_page_mkwrite, 1571 .page_mkwrite = ceph_page_mkwrite,
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index cefca661464b..b93c631c6c87 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -975,10 +975,12 @@ static int send_cap_msg(struct ceph_mds_session *session,
975 kuid_t uid, kgid_t gid, umode_t mode, 975 kuid_t uid, kgid_t gid, umode_t mode,
976 u64 xattr_version, 976 u64 xattr_version,
977 struct ceph_buffer *xattrs_buf, 977 struct ceph_buffer *xattrs_buf,
978 u64 follows) 978 u64 follows, bool inline_data)
979{ 979{
980 struct ceph_mds_caps *fc; 980 struct ceph_mds_caps *fc;
981 struct ceph_msg *msg; 981 struct ceph_msg *msg;
982 void *p;
983 size_t extra_len;
982 984
983 dout("send_cap_msg %s %llx %llx caps %s wanted %s dirty %s" 985 dout("send_cap_msg %s %llx %llx caps %s wanted %s dirty %s"
984 " seq %u/%u mseq %u follows %lld size %llu/%llu" 986 " seq %u/%u mseq %u follows %lld size %llu/%llu"
@@ -988,7 +990,10 @@ static int send_cap_msg(struct ceph_mds_session *session,
988 seq, issue_seq, mseq, follows, size, max_size, 990 seq, issue_seq, mseq, follows, size, max_size,
989 xattr_version, xattrs_buf ? (int)xattrs_buf->vec.iov_len : 0); 991 xattr_version, xattrs_buf ? (int)xattrs_buf->vec.iov_len : 0);
990 992
991 msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPS, sizeof(*fc), GFP_NOFS, false); 993 /* flock buffer size + inline version + inline data size */
994 extra_len = 4 + 8 + 4;
995 msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPS, sizeof(*fc) + extra_len,
996 GFP_NOFS, false);
992 if (!msg) 997 if (!msg)
993 return -ENOMEM; 998 return -ENOMEM;
994 999
@@ -1020,6 +1025,14 @@ static int send_cap_msg(struct ceph_mds_session *session,
1020 fc->gid = cpu_to_le32(from_kgid(&init_user_ns, gid)); 1025 fc->gid = cpu_to_le32(from_kgid(&init_user_ns, gid));
1021 fc->mode = cpu_to_le32(mode); 1026 fc->mode = cpu_to_le32(mode);
1022 1027
1028 p = fc + 1;
1029 /* flock buffer size */
1030 ceph_encode_32(&p, 0);
1031 /* inline version */
1032 ceph_encode_64(&p, inline_data ? 0 : CEPH_INLINE_NONE);
1033 /* inline data size */
1034 ceph_encode_32(&p, 0);
1035
1023 fc->xattr_version = cpu_to_le64(xattr_version); 1036 fc->xattr_version = cpu_to_le64(xattr_version);
1024 if (xattrs_buf) { 1037 if (xattrs_buf) {
1025 msg->middle = ceph_buffer_get(xattrs_buf); 1038 msg->middle = ceph_buffer_get(xattrs_buf);
@@ -1126,6 +1139,7 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
1126 u64 flush_tid = 0; 1139 u64 flush_tid = 0;
1127 int i; 1140 int i;
1128 int ret; 1141 int ret;
1142 bool inline_data;
1129 1143
1130 held = cap->issued | cap->implemented; 1144 held = cap->issued | cap->implemented;
1131 revoking = cap->implemented & ~cap->issued; 1145 revoking = cap->implemented & ~cap->issued;
@@ -1209,13 +1223,15 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
1209 xattr_version = ci->i_xattrs.version; 1223 xattr_version = ci->i_xattrs.version;
1210 } 1224 }
1211 1225
1226 inline_data = ci->i_inline_version != CEPH_INLINE_NONE;
1227
1212 spin_unlock(&ci->i_ceph_lock); 1228 spin_unlock(&ci->i_ceph_lock);
1213 1229
1214 ret = send_cap_msg(session, ceph_vino(inode).ino, cap_id, 1230 ret = send_cap_msg(session, ceph_vino(inode).ino, cap_id,
1215 op, keep, want, flushing, seq, flush_tid, issue_seq, mseq, 1231 op, keep, want, flushing, seq, flush_tid, issue_seq, mseq,
1216 size, max_size, &mtime, &atime, time_warp_seq, 1232 size, max_size, &mtime, &atime, time_warp_seq,
1217 uid, gid, mode, xattr_version, xattr_blob, 1233 uid, gid, mode, xattr_version, xattr_blob,
1218 follows); 1234 follows, inline_data);
1219 if (ret < 0) { 1235 if (ret < 0) {
1220 dout("error sending cap msg, must requeue %p\n", inode); 1236 dout("error sending cap msg, must requeue %p\n", inode);
1221 delayed = 1; 1237 delayed = 1;
@@ -1336,7 +1352,7 @@ retry:
1336 capsnap->time_warp_seq, 1352 capsnap->time_warp_seq,
1337 capsnap->uid, capsnap->gid, capsnap->mode, 1353 capsnap->uid, capsnap->gid, capsnap->mode,
1338 capsnap->xattr_version, capsnap->xattr_blob, 1354 capsnap->xattr_version, capsnap->xattr_blob,
1339 capsnap->follows); 1355 capsnap->follows, capsnap->inline_data);
1340 1356
1341 next_follows = capsnap->follows + 1; 1357 next_follows = capsnap->follows + 1;
1342 ceph_put_cap_snap(capsnap); 1358 ceph_put_cap_snap(capsnap);
@@ -2057,15 +2073,17 @@ static void __take_cap_refs(struct ceph_inode_info *ci, int got)
2057 * requested from the MDS. 2073 * requested from the MDS.
2058 */ 2074 */
2059static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want, 2075static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want,
2060 int *got, loff_t endoff, int *check_max, int *err) 2076 loff_t endoff, int *got, struct page **pinned_page,
2077 int *check_max, int *err)
2061{ 2078{
2062 struct inode *inode = &ci->vfs_inode; 2079 struct inode *inode = &ci->vfs_inode;
2063 int ret = 0; 2080 int ret = 0;
2064 int have, implemented; 2081 int have, implemented, _got = 0;
2065 int file_wanted; 2082 int file_wanted;
2066 2083
2067 dout("get_cap_refs %p need %s want %s\n", inode, 2084 dout("get_cap_refs %p need %s want %s\n", inode,
2068 ceph_cap_string(need), ceph_cap_string(want)); 2085 ceph_cap_string(need), ceph_cap_string(want));
2086again:
2069 spin_lock(&ci->i_ceph_lock); 2087 spin_lock(&ci->i_ceph_lock);
2070 2088
2071 /* make sure file is actually open */ 2089 /* make sure file is actually open */
@@ -2075,7 +2093,7 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want,
2075 ceph_cap_string(need), ceph_cap_string(file_wanted)); 2093 ceph_cap_string(need), ceph_cap_string(file_wanted));
2076 *err = -EBADF; 2094 *err = -EBADF;
2077 ret = 1; 2095 ret = 1;
2078 goto out; 2096 goto out_unlock;
2079 } 2097 }
2080 2098
2081 /* finish pending truncate */ 2099 /* finish pending truncate */
@@ -2095,7 +2113,7 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want,
2095 *check_max = 1; 2113 *check_max = 1;
2096 ret = 1; 2114 ret = 1;
2097 } 2115 }
2098 goto out; 2116 goto out_unlock;
2099 } 2117 }
2100 /* 2118 /*
2101 * If a sync write is in progress, we must wait, so that we 2119 * If a sync write is in progress, we must wait, so that we
@@ -2103,7 +2121,7 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want,
2103 */ 2121 */
2104 if (__ceph_have_pending_cap_snap(ci)) { 2122 if (__ceph_have_pending_cap_snap(ci)) {
2105 dout("get_cap_refs %p cap_snap_pending\n", inode); 2123 dout("get_cap_refs %p cap_snap_pending\n", inode);
2106 goto out; 2124 goto out_unlock;
2107 } 2125 }
2108 } 2126 }
2109 2127
@@ -2120,18 +2138,50 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want,
2120 inode, ceph_cap_string(have), ceph_cap_string(not), 2138 inode, ceph_cap_string(have), ceph_cap_string(not),
2121 ceph_cap_string(revoking)); 2139 ceph_cap_string(revoking));
2122 if ((revoking & not) == 0) { 2140 if ((revoking & not) == 0) {
2123 *got = need | (have & want); 2141 _got = need | (have & want);
2124 __take_cap_refs(ci, *got); 2142 __take_cap_refs(ci, _got);
2125 ret = 1; 2143 ret = 1;
2126 } 2144 }
2127 } else { 2145 } else {
2128 dout("get_cap_refs %p have %s needed %s\n", inode, 2146 dout("get_cap_refs %p have %s needed %s\n", inode,
2129 ceph_cap_string(have), ceph_cap_string(need)); 2147 ceph_cap_string(have), ceph_cap_string(need));
2130 } 2148 }
2131out: 2149out_unlock:
2132 spin_unlock(&ci->i_ceph_lock); 2150 spin_unlock(&ci->i_ceph_lock);
2151
2152 if (ci->i_inline_version != CEPH_INLINE_NONE &&
2153 (_got & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) &&
2154 i_size_read(inode) > 0) {
2155 int ret1;
2156 struct page *page = find_get_page(inode->i_mapping, 0);
2157 if (page) {
2158 if (PageUptodate(page)) {
2159 *pinned_page = page;
2160 goto out;
2161 }
2162 page_cache_release(page);
2163 }
2164 /*
2165 * drop cap refs first because getattr while holding
2166 * caps refs can cause deadlock.
2167 */
2168 ceph_put_cap_refs(ci, _got);
2169 _got = 0;
2170
2171 /* getattr request will bring inline data into page cache */
2172 ret1 = __ceph_do_getattr(inode, NULL,
2173 CEPH_STAT_CAP_INLINE_DATA, true);
2174 if (ret1 >= 0) {
2175 ret = 0;
2176 goto again;
2177 }
2178 *err = ret1;
2179 ret = 1;
2180 }
2181out:
2133 dout("get_cap_refs %p ret %d got %s\n", inode, 2182 dout("get_cap_refs %p ret %d got %s\n", inode,
2134 ret, ceph_cap_string(*got)); 2183 ret, ceph_cap_string(_got));
2184 *got = _got;
2135 return ret; 2185 return ret;
2136} 2186}
2137 2187
@@ -2168,8 +2218,8 @@ static void check_max_size(struct inode *inode, loff_t endoff)
2168 * due to a small max_size, make sure we check_max_size (and possibly 2218 * due to a small max_size, make sure we check_max_size (and possibly
2169 * ask the mds) so we don't get hung up indefinitely. 2219 * ask the mds) so we don't get hung up indefinitely.
2170 */ 2220 */
2171int ceph_get_caps(struct ceph_inode_info *ci, int need, int want, int *got, 2221int ceph_get_caps(struct ceph_inode_info *ci, int need, int want,
2172 loff_t endoff) 2222 loff_t endoff, int *got, struct page **pinned_page)
2173{ 2223{
2174 int check_max, ret, err; 2224 int check_max, ret, err;
2175 2225
@@ -2179,8 +2229,8 @@ retry:
2179 check_max = 0; 2229 check_max = 0;
2180 err = 0; 2230 err = 0;
2181 ret = wait_event_interruptible(ci->i_cap_wq, 2231 ret = wait_event_interruptible(ci->i_cap_wq,
2182 try_get_cap_refs(ci, need, want, 2232 try_get_cap_refs(ci, need, want, endoff,
2183 got, endoff, 2233 got, pinned_page,
2184 &check_max, &err)); 2234 &check_max, &err));
2185 if (err) 2235 if (err)
2186 ret = err; 2236 ret = err;
@@ -2383,6 +2433,8 @@ static void invalidate_aliases(struct inode *inode)
2383static void handle_cap_grant(struct ceph_mds_client *mdsc, 2433static void handle_cap_grant(struct ceph_mds_client *mdsc,
2384 struct inode *inode, struct ceph_mds_caps *grant, 2434 struct inode *inode, struct ceph_mds_caps *grant,
2385 void *snaptrace, int snaptrace_len, 2435 void *snaptrace, int snaptrace_len,
2436 u64 inline_version,
2437 void *inline_data, int inline_len,
2386 struct ceph_buffer *xattr_buf, 2438 struct ceph_buffer *xattr_buf,
2387 struct ceph_mds_session *session, 2439 struct ceph_mds_session *session,
2388 struct ceph_cap *cap, int issued) 2440 struct ceph_cap *cap, int issued)
@@ -2403,6 +2455,7 @@ static void handle_cap_grant(struct ceph_mds_client *mdsc,
2403 bool queue_invalidate = false; 2455 bool queue_invalidate = false;
2404 bool queue_revalidate = false; 2456 bool queue_revalidate = false;
2405 bool deleted_inode = false; 2457 bool deleted_inode = false;
2458 bool fill_inline = false;
2406 2459
2407 dout("handle_cap_grant inode %p cap %p mds%d seq %d %s\n", 2460 dout("handle_cap_grant inode %p cap %p mds%d seq %d %s\n",
2408 inode, cap, mds, seq, ceph_cap_string(newcaps)); 2461 inode, cap, mds, seq, ceph_cap_string(newcaps));
@@ -2576,6 +2629,13 @@ static void handle_cap_grant(struct ceph_mds_client *mdsc,
2576 } 2629 }
2577 BUG_ON(cap->issued & ~cap->implemented); 2630 BUG_ON(cap->issued & ~cap->implemented);
2578 2631
2632 if (inline_version > 0 && inline_version >= ci->i_inline_version) {
2633 ci->i_inline_version = inline_version;
2634 if (ci->i_inline_version != CEPH_INLINE_NONE &&
2635 (newcaps & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)))
2636 fill_inline = true;
2637 }
2638
2579 spin_unlock(&ci->i_ceph_lock); 2639 spin_unlock(&ci->i_ceph_lock);
2580 2640
2581 if (le32_to_cpu(grant->op) == CEPH_CAP_OP_IMPORT) { 2641 if (le32_to_cpu(grant->op) == CEPH_CAP_OP_IMPORT) {
@@ -2589,6 +2649,9 @@ static void handle_cap_grant(struct ceph_mds_client *mdsc,
2589 wake = true; 2649 wake = true;
2590 } 2650 }
2591 2651
2652 if (fill_inline)
2653 ceph_fill_inline_data(inode, NULL, inline_data, inline_len);
2654
2592 if (queue_trunc) { 2655 if (queue_trunc) {
2593 ceph_queue_vmtruncate(inode); 2656 ceph_queue_vmtruncate(inode);
2594 ceph_queue_revalidate(inode); 2657 ceph_queue_revalidate(inode);
@@ -2996,11 +3059,12 @@ void ceph_handle_caps(struct ceph_mds_session *session,
2996 u64 cap_id; 3059 u64 cap_id;
2997 u64 size, max_size; 3060 u64 size, max_size;
2998 u64 tid; 3061 u64 tid;
3062 u64 inline_version = 0;
3063 void *inline_data = NULL;
3064 u32 inline_len = 0;
2999 void *snaptrace; 3065 void *snaptrace;
3000 size_t snaptrace_len; 3066 size_t snaptrace_len;
3001 void *flock; 3067 void *p, *end;
3002 void *end;
3003 u32 flock_len;
3004 3068
3005 dout("handle_caps from mds%d\n", mds); 3069 dout("handle_caps from mds%d\n", mds);
3006 3070
@@ -3021,30 +3085,37 @@ void ceph_handle_caps(struct ceph_mds_session *session,
3021 3085
3022 snaptrace = h + 1; 3086 snaptrace = h + 1;
3023 snaptrace_len = le32_to_cpu(h->snap_trace_len); 3087 snaptrace_len = le32_to_cpu(h->snap_trace_len);
3088 p = snaptrace + snaptrace_len;
3024 3089
3025 if (le16_to_cpu(msg->hdr.version) >= 2) { 3090 if (le16_to_cpu(msg->hdr.version) >= 2) {
3026 void *p = snaptrace + snaptrace_len; 3091 u32 flock_len;
3027 ceph_decode_32_safe(&p, end, flock_len, bad); 3092 ceph_decode_32_safe(&p, end, flock_len, bad);
3028 if (p + flock_len > end) 3093 if (p + flock_len > end)
3029 goto bad; 3094 goto bad;
3030 flock = p; 3095 p += flock_len;
3031 } else {
3032 flock = NULL;
3033 flock_len = 0;
3034 } 3096 }
3035 3097
3036 if (le16_to_cpu(msg->hdr.version) >= 3) { 3098 if (le16_to_cpu(msg->hdr.version) >= 3) {
3037 if (op == CEPH_CAP_OP_IMPORT) { 3099 if (op == CEPH_CAP_OP_IMPORT) {
3038 void *p = flock + flock_len;
3039 if (p + sizeof(*peer) > end) 3100 if (p + sizeof(*peer) > end)
3040 goto bad; 3101 goto bad;
3041 peer = p; 3102 peer = p;
3103 p += sizeof(*peer);
3042 } else if (op == CEPH_CAP_OP_EXPORT) { 3104 } else if (op == CEPH_CAP_OP_EXPORT) {
3043 /* recorded in unused fields */ 3105 /* recorded in unused fields */
3044 peer = (void *)&h->size; 3106 peer = (void *)&h->size;
3045 } 3107 }
3046 } 3108 }
3047 3109
3110 if (le16_to_cpu(msg->hdr.version) >= 4) {
3111 ceph_decode_64_safe(&p, end, inline_version, bad);
3112 ceph_decode_32_safe(&p, end, inline_len, bad);
3113 if (p + inline_len > end)
3114 goto bad;
3115 inline_data = p;
3116 p += inline_len;
3117 }
3118
3048 /* lookup ino */ 3119 /* lookup ino */
3049 inode = ceph_find_inode(sb, vino); 3120 inode = ceph_find_inode(sb, vino);
3050 ci = ceph_inode(inode); 3121 ci = ceph_inode(inode);
@@ -3085,6 +3156,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
3085 handle_cap_import(mdsc, inode, h, peer, session, 3156 handle_cap_import(mdsc, inode, h, peer, session,
3086 &cap, &issued); 3157 &cap, &issued);
3087 handle_cap_grant(mdsc, inode, h, snaptrace, snaptrace_len, 3158 handle_cap_grant(mdsc, inode, h, snaptrace, snaptrace_len,
3159 inline_version, inline_data, inline_len,
3088 msg->middle, session, cap, issued); 3160 msg->middle, session, cap, issued);
3089 goto done_unlocked; 3161 goto done_unlocked;
3090 } 3162 }
@@ -3105,8 +3177,9 @@ void ceph_handle_caps(struct ceph_mds_session *session,
3105 case CEPH_CAP_OP_GRANT: 3177 case CEPH_CAP_OP_GRANT:
3106 __ceph_caps_issued(ci, &issued); 3178 __ceph_caps_issued(ci, &issued);
3107 issued |= __ceph_caps_dirty(ci); 3179 issued |= __ceph_caps_dirty(ci);
3108 handle_cap_grant(mdsc, inode, h, NULL, 0, msg->middle, 3180 handle_cap_grant(mdsc, inode, h, NULL, 0,
3109 session, cap, issued); 3181 inline_version, inline_data, inline_len,
3182 msg->middle, session, cap, issued);
3110 goto done_unlocked; 3183 goto done_unlocked;
3111 3184
3112 case CEPH_CAP_OP_FLUSH_ACK: 3185 case CEPH_CAP_OP_FLUSH_ACK:
@@ -3137,8 +3210,7 @@ flush_cap_releases:
3137done: 3210done:
3138 mutex_unlock(&session->s_mutex); 3211 mutex_unlock(&session->s_mutex);
3139done_unlocked: 3212done_unlocked:
3140 if (inode) 3213 iput(inode);
3141 iput(inode);
3142 return; 3214 return;
3143 3215
3144bad: 3216bad:
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 681a8537b64f..c241603764fd 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -183,7 +183,7 @@ more:
183 spin_unlock(&parent->d_lock); 183 spin_unlock(&parent->d_lock);
184 184
185 /* make sure a dentry wasn't dropped while we didn't have parent lock */ 185 /* make sure a dentry wasn't dropped while we didn't have parent lock */
186 if (!ceph_dir_is_complete(dir)) { 186 if (!ceph_dir_is_complete_ordered(dir)) {
187 dout(" lost dir complete on %p; falling back to mds\n", dir); 187 dout(" lost dir complete on %p; falling back to mds\n", dir);
188 dput(dentry); 188 dput(dentry);
189 err = -EAGAIN; 189 err = -EAGAIN;
@@ -261,10 +261,6 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
261 261
262 /* always start with . and .. */ 262 /* always start with . and .. */
263 if (ctx->pos == 0) { 263 if (ctx->pos == 0) {
264 /* note dir version at start of readdir so we can tell
265 * if any dentries get dropped */
266 fi->dir_release_count = atomic_read(&ci->i_release_count);
267
268 dout("readdir off 0 -> '.'\n"); 264 dout("readdir off 0 -> '.'\n");
269 if (!dir_emit(ctx, ".", 1, 265 if (!dir_emit(ctx, ".", 1,
270 ceph_translate_ino(inode->i_sb, inode->i_ino), 266 ceph_translate_ino(inode->i_sb, inode->i_ino),
@@ -289,7 +285,7 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
289 if ((ctx->pos == 2 || fi->dentry) && 285 if ((ctx->pos == 2 || fi->dentry) &&
290 !ceph_test_mount_opt(fsc, NOASYNCREADDIR) && 286 !ceph_test_mount_opt(fsc, NOASYNCREADDIR) &&
291 ceph_snap(inode) != CEPH_SNAPDIR && 287 ceph_snap(inode) != CEPH_SNAPDIR &&
292 __ceph_dir_is_complete(ci) && 288 __ceph_dir_is_complete_ordered(ci) &&
293 __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1)) { 289 __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1)) {
294 u32 shared_gen = ci->i_shared_gen; 290 u32 shared_gen = ci->i_shared_gen;
295 spin_unlock(&ci->i_ceph_lock); 291 spin_unlock(&ci->i_ceph_lock);
@@ -312,6 +308,13 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
312 308
313 /* proceed with a normal readdir */ 309 /* proceed with a normal readdir */
314 310
311 if (ctx->pos == 2) {
312 /* note dir version at start of readdir so we can tell
313 * if any dentries get dropped */
314 fi->dir_release_count = atomic_read(&ci->i_release_count);
315 fi->dir_ordered_count = ci->i_ordered_count;
316 }
317
315more: 318more:
316 /* do we have the correct frag content buffered? */ 319 /* do we have the correct frag content buffered? */
317 if (fi->frag != frag || fi->last_readdir == NULL) { 320 if (fi->frag != frag || fi->last_readdir == NULL) {
@@ -446,8 +449,12 @@ more:
446 */ 449 */
447 spin_lock(&ci->i_ceph_lock); 450 spin_lock(&ci->i_ceph_lock);
448 if (atomic_read(&ci->i_release_count) == fi->dir_release_count) { 451 if (atomic_read(&ci->i_release_count) == fi->dir_release_count) {
449 dout(" marking %p complete\n", inode); 452 if (ci->i_ordered_count == fi->dir_ordered_count)
450 __ceph_dir_set_complete(ci, fi->dir_release_count); 453 dout(" marking %p complete and ordered\n", inode);
454 else
455 dout(" marking %p complete\n", inode);
456 __ceph_dir_set_complete(ci, fi->dir_release_count,
457 fi->dir_ordered_count);
451 } 458 }
452 spin_unlock(&ci->i_ceph_lock); 459 spin_unlock(&ci->i_ceph_lock);
453 460
@@ -805,7 +812,9 @@ static int ceph_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
805 acls.pagelist = NULL; 812 acls.pagelist = NULL;
806 } 813 }
807 err = ceph_mdsc_do_request(mdsc, dir, req); 814 err = ceph_mdsc_do_request(mdsc, dir, req);
808 if (!err && !req->r_reply_info.head->is_dentry) 815 if (!err &&
816 !req->r_reply_info.head->is_target &&
817 !req->r_reply_info.head->is_dentry)
809 err = ceph_handle_notrace_create(dir, dentry); 818 err = ceph_handle_notrace_create(dir, dentry);
810 ceph_mdsc_put_request(req); 819 ceph_mdsc_put_request(req);
811out: 820out:
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 9f8e3572040e..ce74b394b49d 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -333,6 +333,11 @@ int ceph_release(struct inode *inode, struct file *file)
333 return 0; 333 return 0;
334} 334}
335 335
336enum {
337 CHECK_EOF = 1,
338 READ_INLINE = 2,
339};
340
336/* 341/*
337 * Read a range of bytes striped over one or more objects. Iterate over 342 * Read a range of bytes striped over one or more objects. Iterate over
338 * objects we stripe over. (That's not atomic, but good enough for now.) 343 * objects we stripe over. (That's not atomic, but good enough for now.)
@@ -412,7 +417,7 @@ more:
412 ret = read; 417 ret = read;
413 /* did we bounce off eof? */ 418 /* did we bounce off eof? */
414 if (pos + left > inode->i_size) 419 if (pos + left > inode->i_size)
415 *checkeof = 1; 420 *checkeof = CHECK_EOF;
416 } 421 }
417 422
418 dout("striped_read returns %d\n", ret); 423 dout("striped_read returns %d\n", ret);
@@ -598,7 +603,7 @@ ceph_sync_direct_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos)
598 snapc = ci->i_snap_realm->cached_context; 603 snapc = ci->i_snap_realm->cached_context;
599 vino = ceph_vino(inode); 604 vino = ceph_vino(inode);
600 req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, 605 req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
601 vino, pos, &len, 606 vino, pos, &len, 0,
602 2,/*include a 'startsync' command*/ 607 2,/*include a 'startsync' command*/
603 CEPH_OSD_OP_WRITE, flags, snapc, 608 CEPH_OSD_OP_WRITE, flags, snapc,
604 ci->i_truncate_seq, 609 ci->i_truncate_seq,
@@ -609,6 +614,8 @@ ceph_sync_direct_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos)
609 break; 614 break;
610 } 615 }
611 616
617 osd_req_op_init(req, 1, CEPH_OSD_OP_STARTSYNC);
618
612 n = iov_iter_get_pages_alloc(from, &pages, len, &start); 619 n = iov_iter_get_pages_alloc(from, &pages, len, &start);
613 if (unlikely(n < 0)) { 620 if (unlikely(n < 0)) {
614 ret = n; 621 ret = n;
@@ -713,7 +720,7 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos)
713 snapc = ci->i_snap_realm->cached_context; 720 snapc = ci->i_snap_realm->cached_context;
714 vino = ceph_vino(inode); 721 vino = ceph_vino(inode);
715 req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, 722 req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
716 vino, pos, &len, 1, 723 vino, pos, &len, 0, 1,
717 CEPH_OSD_OP_WRITE, flags, snapc, 724 CEPH_OSD_OP_WRITE, flags, snapc,
718 ci->i_truncate_seq, 725 ci->i_truncate_seq,
719 ci->i_truncate_size, 726 ci->i_truncate_size,
@@ -803,9 +810,10 @@ static ssize_t ceph_read_iter(struct kiocb *iocb, struct iov_iter *to)
803 size_t len = iocb->ki_nbytes; 810 size_t len = iocb->ki_nbytes;
804 struct inode *inode = file_inode(filp); 811 struct inode *inode = file_inode(filp);
805 struct ceph_inode_info *ci = ceph_inode(inode); 812 struct ceph_inode_info *ci = ceph_inode(inode);
813 struct page *pinned_page = NULL;
806 ssize_t ret; 814 ssize_t ret;
807 int want, got = 0; 815 int want, got = 0;
808 int checkeof = 0, read = 0; 816 int retry_op = 0, read = 0;
809 817
810again: 818again:
811 dout("aio_read %p %llx.%llx %llu~%u trying to get caps on %p\n", 819 dout("aio_read %p %llx.%llx %llu~%u trying to get caps on %p\n",
@@ -815,7 +823,7 @@ again:
815 want = CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO; 823 want = CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO;
816 else 824 else
817 want = CEPH_CAP_FILE_CACHE; 825 want = CEPH_CAP_FILE_CACHE;
818 ret = ceph_get_caps(ci, CEPH_CAP_FILE_RD, want, &got, -1); 826 ret = ceph_get_caps(ci, CEPH_CAP_FILE_RD, want, -1, &got, &pinned_page);
819 if (ret < 0) 827 if (ret < 0)
820 return ret; 828 return ret;
821 829
@@ -827,8 +835,12 @@ again:
827 inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len, 835 inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len,
828 ceph_cap_string(got)); 836 ceph_cap_string(got));
829 837
830 /* hmm, this isn't really async... */ 838 if (ci->i_inline_version == CEPH_INLINE_NONE) {
831 ret = ceph_sync_read(iocb, to, &checkeof); 839 /* hmm, this isn't really async... */
840 ret = ceph_sync_read(iocb, to, &retry_op);
841 } else {
842 retry_op = READ_INLINE;
843 }
832 } else { 844 } else {
833 dout("aio_read %p %llx.%llx %llu~%u got cap refs on %s\n", 845 dout("aio_read %p %llx.%llx %llu~%u got cap refs on %s\n",
834 inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len, 846 inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len,
@@ -838,13 +850,55 @@ again:
838 } 850 }
839 dout("aio_read %p %llx.%llx dropping cap refs on %s = %d\n", 851 dout("aio_read %p %llx.%llx dropping cap refs on %s = %d\n",
840 inode, ceph_vinop(inode), ceph_cap_string(got), (int)ret); 852 inode, ceph_vinop(inode), ceph_cap_string(got), (int)ret);
853 if (pinned_page) {
854 page_cache_release(pinned_page);
855 pinned_page = NULL;
856 }
841 ceph_put_cap_refs(ci, got); 857 ceph_put_cap_refs(ci, got);
858 if (retry_op && ret >= 0) {
859 int statret;
860 struct page *page = NULL;
861 loff_t i_size;
862 if (retry_op == READ_INLINE) {
863 page = __page_cache_alloc(GFP_NOFS);
864 if (!page)
865 return -ENOMEM;
866 }
842 867
843 if (checkeof && ret >= 0) { 868 statret = __ceph_do_getattr(inode, page,
844 int statret = ceph_do_getattr(inode, CEPH_STAT_CAP_SIZE, false); 869 CEPH_STAT_CAP_INLINE_DATA, !!page);
870 if (statret < 0) {
871 __free_page(page);
872 if (statret == -ENODATA) {
873 BUG_ON(retry_op != READ_INLINE);
874 goto again;
875 }
876 return statret;
877 }
878
879 i_size = i_size_read(inode);
880 if (retry_op == READ_INLINE) {
881 /* does not support inline data > PAGE_SIZE */
882 if (i_size > PAGE_CACHE_SIZE) {
883 ret = -EIO;
884 } else if (iocb->ki_pos < i_size) {
885 loff_t end = min_t(loff_t, i_size,
886 iocb->ki_pos + len);
887 if (statret < end)
888 zero_user_segment(page, statret, end);
889 ret = copy_page_to_iter(page,
890 iocb->ki_pos & ~PAGE_MASK,
891 end - iocb->ki_pos, to);
892 iocb->ki_pos += ret;
893 } else {
894 ret = 0;
895 }
896 __free_pages(page, 0);
897 return ret;
898 }
845 899
846 /* hit EOF or hole? */ 900 /* hit EOF or hole? */
847 if (statret == 0 && iocb->ki_pos < inode->i_size && 901 if (retry_op == CHECK_EOF && iocb->ki_pos < i_size &&
848 ret < len) { 902 ret < len) {
849 dout("sync_read hit hole, ppos %lld < size %lld" 903 dout("sync_read hit hole, ppos %lld < size %lld"
850 ", reading more\n", iocb->ki_pos, 904 ", reading more\n", iocb->ki_pos,
@@ -852,7 +906,7 @@ again:
852 906
853 read += ret; 907 read += ret;
854 len -= ret; 908 len -= ret;
855 checkeof = 0; 909 retry_op = 0;
856 goto again; 910 goto again;
857 } 911 }
858 } 912 }
@@ -909,6 +963,12 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
909 if (err) 963 if (err)
910 goto out; 964 goto out;
911 965
966 if (ci->i_inline_version != CEPH_INLINE_NONE) {
967 err = ceph_uninline_data(file, NULL);
968 if (err < 0)
969 goto out;
970 }
971
912retry_snap: 972retry_snap:
913 if (ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_FULL)) { 973 if (ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_FULL)) {
914 err = -ENOSPC; 974 err = -ENOSPC;
@@ -922,7 +982,8 @@ retry_snap:
922 else 982 else
923 want = CEPH_CAP_FILE_BUFFER; 983 want = CEPH_CAP_FILE_BUFFER;
924 got = 0; 984 got = 0;
925 err = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, &got, pos + count); 985 err = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, pos + count,
986 &got, NULL);
926 if (err < 0) 987 if (err < 0)
927 goto out; 988 goto out;
928 989
@@ -969,6 +1030,7 @@ retry_snap:
969 if (written >= 0) { 1030 if (written >= 0) {
970 int dirty; 1031 int dirty;
971 spin_lock(&ci->i_ceph_lock); 1032 spin_lock(&ci->i_ceph_lock);
1033 ci->i_inline_version = CEPH_INLINE_NONE;
972 dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR); 1034 dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR);
973 spin_unlock(&ci->i_ceph_lock); 1035 spin_unlock(&ci->i_ceph_lock);
974 if (dirty) 1036 if (dirty)
@@ -1111,7 +1173,7 @@ static int ceph_zero_partial_object(struct inode *inode,
1111 req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, 1173 req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
1112 ceph_vino(inode), 1174 ceph_vino(inode),
1113 offset, length, 1175 offset, length,
1114 1, op, 1176 0, 1, op,
1115 CEPH_OSD_FLAG_WRITE | 1177 CEPH_OSD_FLAG_WRITE |
1116 CEPH_OSD_FLAG_ONDISK, 1178 CEPH_OSD_FLAG_ONDISK,
1117 NULL, 0, 0, false); 1179 NULL, 0, 0, false);
@@ -1214,6 +1276,12 @@ static long ceph_fallocate(struct file *file, int mode,
1214 goto unlock; 1276 goto unlock;
1215 } 1277 }
1216 1278
1279 if (ci->i_inline_version != CEPH_INLINE_NONE) {
1280 ret = ceph_uninline_data(file, NULL);
1281 if (ret < 0)
1282 goto unlock;
1283 }
1284
1217 size = i_size_read(inode); 1285 size = i_size_read(inode);
1218 if (!(mode & FALLOC_FL_KEEP_SIZE)) 1286 if (!(mode & FALLOC_FL_KEEP_SIZE))
1219 endoff = offset + length; 1287 endoff = offset + length;
@@ -1223,7 +1291,7 @@ static long ceph_fallocate(struct file *file, int mode,
1223 else 1291 else
1224 want = CEPH_CAP_FILE_BUFFER; 1292 want = CEPH_CAP_FILE_BUFFER;
1225 1293
1226 ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, &got, endoff); 1294 ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, endoff, &got, NULL);
1227 if (ret < 0) 1295 if (ret < 0)
1228 goto unlock; 1296 goto unlock;
1229 1297
@@ -1240,6 +1308,7 @@ static long ceph_fallocate(struct file *file, int mode,
1240 1308
1241 if (!ret) { 1309 if (!ret) {
1242 spin_lock(&ci->i_ceph_lock); 1310 spin_lock(&ci->i_ceph_lock);
1311 ci->i_inline_version = CEPH_INLINE_NONE;
1243 dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR); 1312 dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR);
1244 spin_unlock(&ci->i_ceph_lock); 1313 spin_unlock(&ci->i_ceph_lock);
1245 if (dirty) 1314 if (dirty)
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index a5593d51d035..f61a74115beb 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -387,8 +387,10 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
387 spin_lock_init(&ci->i_ceph_lock); 387 spin_lock_init(&ci->i_ceph_lock);
388 388
389 ci->i_version = 0; 389 ci->i_version = 0;
390 ci->i_inline_version = 0;
390 ci->i_time_warp_seq = 0; 391 ci->i_time_warp_seq = 0;
391 ci->i_ceph_flags = 0; 392 ci->i_ceph_flags = 0;
393 ci->i_ordered_count = 0;
392 atomic_set(&ci->i_release_count, 1); 394 atomic_set(&ci->i_release_count, 1);
393 atomic_set(&ci->i_complete_count, 0); 395 atomic_set(&ci->i_complete_count, 0);
394 ci->i_symlink = NULL; 396 ci->i_symlink = NULL;
@@ -657,7 +659,7 @@ void ceph_fill_file_time(struct inode *inode, int issued,
657 * Populate an inode based on info from mds. May be called on new or 659 * Populate an inode based on info from mds. May be called on new or
658 * existing inodes. 660 * existing inodes.
659 */ 661 */
660static int fill_inode(struct inode *inode, 662static int fill_inode(struct inode *inode, struct page *locked_page,
661 struct ceph_mds_reply_info_in *iinfo, 663 struct ceph_mds_reply_info_in *iinfo,
662 struct ceph_mds_reply_dirfrag *dirinfo, 664 struct ceph_mds_reply_dirfrag *dirinfo,
663 struct ceph_mds_session *session, 665 struct ceph_mds_session *session,
@@ -675,6 +677,7 @@ static int fill_inode(struct inode *inode,
675 bool wake = false; 677 bool wake = false;
676 bool queue_trunc = false; 678 bool queue_trunc = false;
677 bool new_version = false; 679 bool new_version = false;
680 bool fill_inline = false;
678 681
679 dout("fill_inode %p ino %llx.%llx v %llu had %llu\n", 682 dout("fill_inode %p ino %llx.%llx v %llu had %llu\n",
680 inode, ceph_vinop(inode), le64_to_cpu(info->version), 683 inode, ceph_vinop(inode), le64_to_cpu(info->version),
@@ -845,7 +848,8 @@ static int fill_inode(struct inode *inode,
845 (issued & CEPH_CAP_FILE_EXCL) == 0 && 848 (issued & CEPH_CAP_FILE_EXCL) == 0 &&
846 !__ceph_dir_is_complete(ci)) { 849 !__ceph_dir_is_complete(ci)) {
847 dout(" marking %p complete (empty)\n", inode); 850 dout(" marking %p complete (empty)\n", inode);
848 __ceph_dir_set_complete(ci, atomic_read(&ci->i_release_count)); 851 __ceph_dir_set_complete(ci, atomic_read(&ci->i_release_count),
852 ci->i_ordered_count);
849 } 853 }
850 854
851 /* were we issued a capability? */ 855 /* were we issued a capability? */
@@ -873,8 +877,23 @@ static int fill_inode(struct inode *inode,
873 ceph_vinop(inode)); 877 ceph_vinop(inode));
874 __ceph_get_fmode(ci, cap_fmode); 878 __ceph_get_fmode(ci, cap_fmode);
875 } 879 }
880
881 if (iinfo->inline_version > 0 &&
882 iinfo->inline_version >= ci->i_inline_version) {
883 int cache_caps = CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO;
884 ci->i_inline_version = iinfo->inline_version;
885 if (ci->i_inline_version != CEPH_INLINE_NONE &&
886 (locked_page ||
887 (le32_to_cpu(info->cap.caps) & cache_caps)))
888 fill_inline = true;
889 }
890
876 spin_unlock(&ci->i_ceph_lock); 891 spin_unlock(&ci->i_ceph_lock);
877 892
893 if (fill_inline)
894 ceph_fill_inline_data(inode, locked_page,
895 iinfo->inline_data, iinfo->inline_len);
896
878 if (wake) 897 if (wake)
879 wake_up_all(&ci->i_cap_wq); 898 wake_up_all(&ci->i_cap_wq);
880 899
@@ -1062,7 +1081,8 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
1062 struct inode *dir = req->r_locked_dir; 1081 struct inode *dir = req->r_locked_dir;
1063 1082
1064 if (dir) { 1083 if (dir) {
1065 err = fill_inode(dir, &rinfo->diri, rinfo->dirfrag, 1084 err = fill_inode(dir, NULL,
1085 &rinfo->diri, rinfo->dirfrag,
1066 session, req->r_request_started, -1, 1086 session, req->r_request_started, -1,
1067 &req->r_caps_reservation); 1087 &req->r_caps_reservation);
1068 if (err < 0) 1088 if (err < 0)
@@ -1132,7 +1152,7 @@ retry_lookup:
1132 } 1152 }
1133 req->r_target_inode = in; 1153 req->r_target_inode = in;
1134 1154
1135 err = fill_inode(in, &rinfo->targeti, NULL, 1155 err = fill_inode(in, req->r_locked_page, &rinfo->targeti, NULL,
1136 session, req->r_request_started, 1156 session, req->r_request_started,
1137 (!req->r_aborted && rinfo->head->result == 0) ? 1157 (!req->r_aborted && rinfo->head->result == 0) ?
1138 req->r_fmode : -1, 1158 req->r_fmode : -1,
@@ -1204,8 +1224,8 @@ retry_lookup:
1204 ceph_invalidate_dentry_lease(dn); 1224 ceph_invalidate_dentry_lease(dn);
1205 1225
1206 /* d_move screws up sibling dentries' offsets */ 1226 /* d_move screws up sibling dentries' offsets */
1207 ceph_dir_clear_complete(dir); 1227 ceph_dir_clear_ordered(dir);
1208 ceph_dir_clear_complete(olddir); 1228 ceph_dir_clear_ordered(olddir);
1209 1229
1210 dout("dn %p gets new offset %lld\n", req->r_old_dentry, 1230 dout("dn %p gets new offset %lld\n", req->r_old_dentry,
1211 ceph_dentry(req->r_old_dentry)->offset); 1231 ceph_dentry(req->r_old_dentry)->offset);
@@ -1217,6 +1237,7 @@ retry_lookup:
1217 if (!rinfo->head->is_target) { 1237 if (!rinfo->head->is_target) {
1218 dout("fill_trace null dentry\n"); 1238 dout("fill_trace null dentry\n");
1219 if (dn->d_inode) { 1239 if (dn->d_inode) {
1240 ceph_dir_clear_ordered(dir);
1220 dout("d_delete %p\n", dn); 1241 dout("d_delete %p\n", dn);
1221 d_delete(dn); 1242 d_delete(dn);
1222 } else { 1243 } else {
@@ -1233,7 +1254,7 @@ retry_lookup:
1233 1254
1234 /* attach proper inode */ 1255 /* attach proper inode */
1235 if (!dn->d_inode) { 1256 if (!dn->d_inode) {
1236 ceph_dir_clear_complete(dir); 1257 ceph_dir_clear_ordered(dir);
1237 ihold(in); 1258 ihold(in);
1238 dn = splice_dentry(dn, in, &have_lease); 1259 dn = splice_dentry(dn, in, &have_lease);
1239 if (IS_ERR(dn)) { 1260 if (IS_ERR(dn)) {
@@ -1263,7 +1284,7 @@ retry_lookup:
1263 BUG_ON(!dir); 1284 BUG_ON(!dir);
1264 BUG_ON(ceph_snap(dir) != CEPH_SNAPDIR); 1285 BUG_ON(ceph_snap(dir) != CEPH_SNAPDIR);
1265 dout(" linking snapped dir %p to dn %p\n", in, dn); 1286 dout(" linking snapped dir %p to dn %p\n", in, dn);
1266 ceph_dir_clear_complete(dir); 1287 ceph_dir_clear_ordered(dir);
1267 ihold(in); 1288 ihold(in);
1268 dn = splice_dentry(dn, in, NULL); 1289 dn = splice_dentry(dn, in, NULL);
1269 if (IS_ERR(dn)) { 1290 if (IS_ERR(dn)) {
@@ -1300,7 +1321,7 @@ static int readdir_prepopulate_inodes_only(struct ceph_mds_request *req,
1300 dout("new_inode badness got %d\n", err); 1321 dout("new_inode badness got %d\n", err);
1301 continue; 1322 continue;
1302 } 1323 }
1303 rc = fill_inode(in, &rinfo->dir_in[i], NULL, session, 1324 rc = fill_inode(in, NULL, &rinfo->dir_in[i], NULL, session,
1304 req->r_request_started, -1, 1325 req->r_request_started, -1,
1305 &req->r_caps_reservation); 1326 &req->r_caps_reservation);
1306 if (rc < 0) { 1327 if (rc < 0) {
@@ -1416,7 +1437,7 @@ retry_lookup:
1416 } 1437 }
1417 } 1438 }
1418 1439
1419 if (fill_inode(in, &rinfo->dir_in[i], NULL, session, 1440 if (fill_inode(in, NULL, &rinfo->dir_in[i], NULL, session,
1420 req->r_request_started, -1, 1441 req->r_request_started, -1,
1421 &req->r_caps_reservation) < 0) { 1442 &req->r_caps_reservation) < 0) {
1422 pr_err("fill_inode badness on %p\n", in); 1443 pr_err("fill_inode badness on %p\n", in);
@@ -1899,7 +1920,8 @@ out_put:
1899 * Verify that we have a lease on the given mask. If not, 1920 * Verify that we have a lease on the given mask. If not,
1900 * do a getattr against an mds. 1921 * do a getattr against an mds.
1901 */ 1922 */
1902int ceph_do_getattr(struct inode *inode, int mask, bool force) 1923int __ceph_do_getattr(struct inode *inode, struct page *locked_page,
1924 int mask, bool force)
1903{ 1925{
1904 struct ceph_fs_client *fsc = ceph_sb_to_client(inode->i_sb); 1926 struct ceph_fs_client *fsc = ceph_sb_to_client(inode->i_sb);
1905 struct ceph_mds_client *mdsc = fsc->mdsc; 1927 struct ceph_mds_client *mdsc = fsc->mdsc;
@@ -1911,7 +1933,8 @@ int ceph_do_getattr(struct inode *inode, int mask, bool force)
1911 return 0; 1933 return 0;
1912 } 1934 }
1913 1935
1914 dout("do_getattr inode %p mask %s mode 0%o\n", inode, ceph_cap_string(mask), inode->i_mode); 1936 dout("do_getattr inode %p mask %s mode 0%o\n",
1937 inode, ceph_cap_string(mask), inode->i_mode);
1915 if (!force && ceph_caps_issued_mask(ceph_inode(inode), mask, 1)) 1938 if (!force && ceph_caps_issued_mask(ceph_inode(inode), mask, 1))
1916 return 0; 1939 return 0;
1917 1940
@@ -1922,7 +1945,19 @@ int ceph_do_getattr(struct inode *inode, int mask, bool force)
1922 ihold(inode); 1945 ihold(inode);
1923 req->r_num_caps = 1; 1946 req->r_num_caps = 1;
1924 req->r_args.getattr.mask = cpu_to_le32(mask); 1947 req->r_args.getattr.mask = cpu_to_le32(mask);
1948 req->r_locked_page = locked_page;
1925 err = ceph_mdsc_do_request(mdsc, NULL, req); 1949 err = ceph_mdsc_do_request(mdsc, NULL, req);
1950 if (locked_page && err == 0) {
1951 u64 inline_version = req->r_reply_info.targeti.inline_version;
1952 if (inline_version == 0) {
1953 /* the reply is supposed to contain inline data */
1954 err = -EINVAL;
1955 } else if (inline_version == CEPH_INLINE_NONE) {
1956 err = -ENODATA;
1957 } else {
1958 err = req->r_reply_info.targeti.inline_len;
1959 }
1960 }
1926 ceph_mdsc_put_request(req); 1961 ceph_mdsc_put_request(req);
1927 dout("do_getattr result=%d\n", err); 1962 dout("do_getattr result=%d\n", err);
1928 return err; 1963 return err;
diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c
index fbc39c47bacd..c35c5c614e38 100644
--- a/fs/ceph/locks.c
+++ b/fs/ceph/locks.c
@@ -9,6 +9,8 @@
9#include <linux/ceph/pagelist.h> 9#include <linux/ceph/pagelist.h>
10 10
11static u64 lock_secret; 11static u64 lock_secret;
12static int ceph_lock_wait_for_completion(struct ceph_mds_client *mdsc,
13 struct ceph_mds_request *req);
12 14
13static inline u64 secure_addr(void *addr) 15static inline u64 secure_addr(void *addr)
14{ 16{
@@ -40,6 +42,9 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file,
40 u64 length = 0; 42 u64 length = 0;
41 u64 owner; 43 u64 owner;
42 44
45 if (operation != CEPH_MDS_OP_SETFILELOCK || cmd == CEPH_LOCK_UNLOCK)
46 wait = 0;
47
43 req = ceph_mdsc_create_request(mdsc, operation, USE_AUTH_MDS); 48 req = ceph_mdsc_create_request(mdsc, operation, USE_AUTH_MDS);
44 if (IS_ERR(req)) 49 if (IS_ERR(req))
45 return PTR_ERR(req); 50 return PTR_ERR(req);
@@ -68,6 +73,9 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file,
68 req->r_args.filelock_change.length = cpu_to_le64(length); 73 req->r_args.filelock_change.length = cpu_to_le64(length);
69 req->r_args.filelock_change.wait = wait; 74 req->r_args.filelock_change.wait = wait;
70 75
76 if (wait)
77 req->r_wait_for_completion = ceph_lock_wait_for_completion;
78
71 err = ceph_mdsc_do_request(mdsc, inode, req); 79 err = ceph_mdsc_do_request(mdsc, inode, req);
72 80
73 if (operation == CEPH_MDS_OP_GETFILELOCK) { 81 if (operation == CEPH_MDS_OP_GETFILELOCK) {
@@ -96,6 +104,52 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file,
96 return err; 104 return err;
97} 105}
98 106
107static int ceph_lock_wait_for_completion(struct ceph_mds_client *mdsc,
108 struct ceph_mds_request *req)
109{
110 struct ceph_mds_request *intr_req;
111 struct inode *inode = req->r_inode;
112 int err, lock_type;
113
114 BUG_ON(req->r_op != CEPH_MDS_OP_SETFILELOCK);
115 if (req->r_args.filelock_change.rule == CEPH_LOCK_FCNTL)
116 lock_type = CEPH_LOCK_FCNTL_INTR;
117 else if (req->r_args.filelock_change.rule == CEPH_LOCK_FLOCK)
118 lock_type = CEPH_LOCK_FLOCK_INTR;
119 else
120 BUG_ON(1);
121 BUG_ON(req->r_args.filelock_change.type == CEPH_LOCK_UNLOCK);
122
123 err = wait_for_completion_interruptible(&req->r_completion);
124 if (!err)
125 return 0;
126
127 dout("ceph_lock_wait_for_completion: request %llu was interrupted\n",
128 req->r_tid);
129
130 intr_req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SETFILELOCK,
131 USE_AUTH_MDS);
132 if (IS_ERR(intr_req))
133 return PTR_ERR(intr_req);
134
135 intr_req->r_inode = inode;
136 ihold(inode);
137 intr_req->r_num_caps = 1;
138
139 intr_req->r_args.filelock_change = req->r_args.filelock_change;
140 intr_req->r_args.filelock_change.rule = lock_type;
141 intr_req->r_args.filelock_change.type = CEPH_LOCK_UNLOCK;
142
143 err = ceph_mdsc_do_request(mdsc, inode, intr_req);
144 ceph_mdsc_put_request(intr_req);
145
146 if (err && err != -ERESTARTSYS)
147 return err;
148
149 wait_for_completion(&req->r_completion);
150 return 0;
151}
152
99/** 153/**
100 * Attempt to set an fcntl lock. 154 * Attempt to set an fcntl lock.
101 * For now, this just goes away to the server. Later it may be more awesome. 155 * For now, this just goes away to the server. Later it may be more awesome.
@@ -143,11 +197,6 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
143 err); 197 err);
144 } 198 }
145 } 199 }
146
147 } else if (err == -ERESTARTSYS) {
148 dout("undoing lock\n");
149 ceph_lock_message(CEPH_LOCK_FCNTL, op, file,
150 CEPH_LOCK_UNLOCK, 0, fl);
151 } 200 }
152 return err; 201 return err;
153} 202}
@@ -186,11 +235,6 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
186 file, CEPH_LOCK_UNLOCK, 0, fl); 235 file, CEPH_LOCK_UNLOCK, 0, fl);
187 dout("got %d on flock_lock_file_wait, undid lock", err); 236 dout("got %d on flock_lock_file_wait, undid lock", err);
188 } 237 }
189 } else if (err == -ERESTARTSYS) {
190 dout("undoing lock\n");
191 ceph_lock_message(CEPH_LOCK_FLOCK,
192 CEPH_MDS_OP_SETFILELOCK,
193 file, CEPH_LOCK_UNLOCK, 0, fl);
194 } 238 }
195 return err; 239 return err;
196} 240}
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index a92d3f5c6c12..d2171f4a6980 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -89,6 +89,16 @@ static int parse_reply_info_in(void **p, void *end,
89 ceph_decode_need(p, end, info->xattr_len, bad); 89 ceph_decode_need(p, end, info->xattr_len, bad);
90 info->xattr_data = *p; 90 info->xattr_data = *p;
91 *p += info->xattr_len; 91 *p += info->xattr_len;
92
93 if (features & CEPH_FEATURE_MDS_INLINE_DATA) {
94 ceph_decode_64_safe(p, end, info->inline_version, bad);
95 ceph_decode_32_safe(p, end, info->inline_len, bad);
96 ceph_decode_need(p, end, info->inline_len, bad);
97 info->inline_data = *p;
98 *p += info->inline_len;
99 } else
100 info->inline_version = CEPH_INLINE_NONE;
101
92 return 0; 102 return 0;
93bad: 103bad:
94 return err; 104 return err;
@@ -524,8 +534,7 @@ void ceph_mdsc_release_request(struct kref *kref)
524 } 534 }
525 if (req->r_locked_dir) 535 if (req->r_locked_dir)
526 ceph_put_cap_refs(ceph_inode(req->r_locked_dir), CEPH_CAP_PIN); 536 ceph_put_cap_refs(ceph_inode(req->r_locked_dir), CEPH_CAP_PIN);
527 if (req->r_target_inode) 537 iput(req->r_target_inode);
528 iput(req->r_target_inode);
529 if (req->r_dentry) 538 if (req->r_dentry)
530 dput(req->r_dentry); 539 dput(req->r_dentry);
531 if (req->r_old_dentry) 540 if (req->r_old_dentry)
@@ -861,8 +870,11 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6
861 /* 870 /*
862 * Serialize client metadata into waiting buffer space, using 871 * Serialize client metadata into waiting buffer space, using
863 * the format that userspace expects for map<string, string> 872 * the format that userspace expects for map<string, string>
873 *
874 * ClientSession messages with metadata are v2
864 */ 875 */
865 msg->hdr.version = 2; /* ClientSession messages with metadata are v2 */ 876 msg->hdr.version = cpu_to_le16(2);
877 msg->hdr.compat_version = cpu_to_le16(1);
866 878
867 /* The write pointer, following the session_head structure */ 879 /* The write pointer, following the session_head structure */
868 p = msg->front.iov_base + sizeof(*h); 880 p = msg->front.iov_base + sizeof(*h);
@@ -1066,8 +1078,7 @@ out:
1066 session->s_cap_iterator = NULL; 1078 session->s_cap_iterator = NULL;
1067 spin_unlock(&session->s_cap_lock); 1079 spin_unlock(&session->s_cap_lock);
1068 1080
1069 if (last_inode) 1081 iput(last_inode);
1070 iput(last_inode);
1071 if (old_cap) 1082 if (old_cap)
1072 ceph_put_cap(session->s_mdsc, old_cap); 1083 ceph_put_cap(session->s_mdsc, old_cap);
1073 1084
@@ -1874,7 +1885,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
1874 goto out_free2; 1885 goto out_free2;
1875 } 1886 }
1876 1887
1877 msg->hdr.version = 2; 1888 msg->hdr.version = cpu_to_le16(2);
1878 msg->hdr.tid = cpu_to_le64(req->r_tid); 1889 msg->hdr.tid = cpu_to_le64(req->r_tid);
1879 1890
1880 head = msg->front.iov_base; 1891 head = msg->front.iov_base;
@@ -2208,6 +2219,8 @@ int ceph_mdsc_do_request(struct ceph_mds_client *mdsc,
2208 &req->r_completion, req->r_timeout); 2219 &req->r_completion, req->r_timeout);
2209 if (err == 0) 2220 if (err == 0)
2210 err = -EIO; 2221 err = -EIO;
2222 } else if (req->r_wait_for_completion) {
2223 err = req->r_wait_for_completion(mdsc, req);
2211 } else { 2224 } else {
2212 err = wait_for_completion_killable(&req->r_completion); 2225 err = wait_for_completion_killable(&req->r_completion);
2213 } 2226 }
@@ -3744,6 +3757,20 @@ static struct ceph_msg *mds_alloc_msg(struct ceph_connection *con,
3744 return msg; 3757 return msg;
3745} 3758}
3746 3759
3760static int sign_message(struct ceph_connection *con, struct ceph_msg *msg)
3761{
3762 struct ceph_mds_session *s = con->private;
3763 struct ceph_auth_handshake *auth = &s->s_auth;
3764 return ceph_auth_sign_message(auth, msg);
3765}
3766
3767static int check_message_signature(struct ceph_connection *con, struct ceph_msg *msg)
3768{
3769 struct ceph_mds_session *s = con->private;
3770 struct ceph_auth_handshake *auth = &s->s_auth;
3771 return ceph_auth_check_message_signature(auth, msg);
3772}
3773
3747static const struct ceph_connection_operations mds_con_ops = { 3774static const struct ceph_connection_operations mds_con_ops = {
3748 .get = con_get, 3775 .get = con_get,
3749 .put = con_put, 3776 .put = con_put,
@@ -3753,6 +3780,8 @@ static const struct ceph_connection_operations mds_con_ops = {
3753 .invalidate_authorizer = invalidate_authorizer, 3780 .invalidate_authorizer = invalidate_authorizer,
3754 .peer_reset = peer_reset, 3781 .peer_reset = peer_reset,
3755 .alloc_msg = mds_alloc_msg, 3782 .alloc_msg = mds_alloc_msg,
3783 .sign_message = sign_message,
3784 .check_message_signature = check_message_signature,
3756}; 3785};
3757 3786
3758/* eof */ 3787/* eof */
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 3288359353e9..e2817d00f7d9 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -41,6 +41,9 @@ struct ceph_mds_reply_info_in {
41 char *symlink; 41 char *symlink;
42 u32 xattr_len; 42 u32 xattr_len;
43 char *xattr_data; 43 char *xattr_data;
44 u64 inline_version;
45 u32 inline_len;
46 char *inline_data;
44}; 47};
45 48
46/* 49/*
@@ -166,6 +169,11 @@ struct ceph_mds_client;
166 */ 169 */
167typedef void (*ceph_mds_request_callback_t) (struct ceph_mds_client *mdsc, 170typedef void (*ceph_mds_request_callback_t) (struct ceph_mds_client *mdsc,
168 struct ceph_mds_request *req); 171 struct ceph_mds_request *req);
172/*
173 * wait for request completion callback
174 */
175typedef int (*ceph_mds_request_wait_callback_t) (struct ceph_mds_client *mdsc,
176 struct ceph_mds_request *req);
169 177
170/* 178/*
171 * an in-flight mds request 179 * an in-flight mds request
@@ -215,6 +223,7 @@ struct ceph_mds_request {
215 int r_request_release_offset; 223 int r_request_release_offset;
216 struct ceph_msg *r_reply; 224 struct ceph_msg *r_reply;
217 struct ceph_mds_reply_info_parsed r_reply_info; 225 struct ceph_mds_reply_info_parsed r_reply_info;
226 struct page *r_locked_page;
218 int r_err; 227 int r_err;
219 bool r_aborted; 228 bool r_aborted;
220 229
@@ -239,6 +248,7 @@ struct ceph_mds_request {
239 struct completion r_completion; 248 struct completion r_completion;
240 struct completion r_safe_completion; 249 struct completion r_safe_completion;
241 ceph_mds_request_callback_t r_callback; 250 ceph_mds_request_callback_t r_callback;
251 ceph_mds_request_wait_callback_t r_wait_for_completion;
242 struct list_head r_unsafe_item; /* per-session unsafe list item */ 252 struct list_head r_unsafe_item; /* per-session unsafe list item */
243 bool r_got_unsafe, r_got_safe, r_got_result; 253 bool r_got_unsafe, r_got_safe, r_got_result;
244 254
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index f01645a27752..ce35fbd4ba5d 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -288,6 +288,9 @@ static int cmpu64_rev(const void *a, const void *b)
288 return 0; 288 return 0;
289} 289}
290 290
291
292static struct ceph_snap_context *empty_snapc;
293
291/* 294/*
292 * build the snap context for a given realm. 295 * build the snap context for a given realm.
293 */ 296 */
@@ -328,6 +331,12 @@ static int build_snap_context(struct ceph_snap_realm *realm)
328 return 0; 331 return 0;
329 } 332 }
330 333
334 if (num == 0 && realm->seq == empty_snapc->seq) {
335 ceph_get_snap_context(empty_snapc);
336 snapc = empty_snapc;
337 goto done;
338 }
339
331 /* alloc new snap context */ 340 /* alloc new snap context */
332 err = -ENOMEM; 341 err = -ENOMEM;
333 if (num > (SIZE_MAX - sizeof(*snapc)) / sizeof(u64)) 342 if (num > (SIZE_MAX - sizeof(*snapc)) / sizeof(u64))
@@ -365,8 +374,8 @@ static int build_snap_context(struct ceph_snap_realm *realm)
365 realm->ino, realm, snapc, snapc->seq, 374 realm->ino, realm, snapc, snapc->seq,
366 (unsigned int) snapc->num_snaps); 375 (unsigned int) snapc->num_snaps);
367 376
368 if (realm->cached_context) 377done:
369 ceph_put_snap_context(realm->cached_context); 378 ceph_put_snap_context(realm->cached_context);
370 realm->cached_context = snapc; 379 realm->cached_context = snapc;
371 return 0; 380 return 0;
372 381
@@ -466,6 +475,9 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci)
466 cap_snap. lucky us. */ 475 cap_snap. lucky us. */
467 dout("queue_cap_snap %p already pending\n", inode); 476 dout("queue_cap_snap %p already pending\n", inode);
468 kfree(capsnap); 477 kfree(capsnap);
478 } else if (ci->i_snap_realm->cached_context == empty_snapc) {
479 dout("queue_cap_snap %p empty snapc\n", inode);
480 kfree(capsnap);
469 } else if (dirty & (CEPH_CAP_AUTH_EXCL|CEPH_CAP_XATTR_EXCL| 481 } else if (dirty & (CEPH_CAP_AUTH_EXCL|CEPH_CAP_XATTR_EXCL|
470 CEPH_CAP_FILE_EXCL|CEPH_CAP_FILE_WR)) { 482 CEPH_CAP_FILE_EXCL|CEPH_CAP_FILE_WR)) {
471 struct ceph_snap_context *snapc = ci->i_head_snapc; 483 struct ceph_snap_context *snapc = ci->i_head_snapc;
@@ -504,6 +516,8 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci)
504 capsnap->xattr_version = 0; 516 capsnap->xattr_version = 0;
505 } 517 }
506 518
519 capsnap->inline_data = ci->i_inline_version != CEPH_INLINE_NONE;
520
507 /* dirty page count moved from _head to this cap_snap; 521 /* dirty page count moved from _head to this cap_snap;
508 all subsequent writes page dirties occur _after_ this 522 all subsequent writes page dirties occur _after_ this
509 snapshot. */ 523 snapshot. */
@@ -590,15 +604,13 @@ static void queue_realm_cap_snaps(struct ceph_snap_realm *realm)
590 if (!inode) 604 if (!inode)
591 continue; 605 continue;
592 spin_unlock(&realm->inodes_with_caps_lock); 606 spin_unlock(&realm->inodes_with_caps_lock);
593 if (lastinode) 607 iput(lastinode);
594 iput(lastinode);
595 lastinode = inode; 608 lastinode = inode;
596 ceph_queue_cap_snap(ci); 609 ceph_queue_cap_snap(ci);
597 spin_lock(&realm->inodes_with_caps_lock); 610 spin_lock(&realm->inodes_with_caps_lock);
598 } 611 }
599 spin_unlock(&realm->inodes_with_caps_lock); 612 spin_unlock(&realm->inodes_with_caps_lock);
600 if (lastinode) 613 iput(lastinode);
601 iput(lastinode);
602 614
603 list_for_each_entry(child, &realm->children, child_item) { 615 list_for_each_entry(child, &realm->children, child_item) {
604 dout("queue_realm_cap_snaps %p %llx queue child %p %llx\n", 616 dout("queue_realm_cap_snaps %p %llx queue child %p %llx\n",
@@ -928,5 +940,16 @@ out:
928 return; 940 return;
929} 941}
930 942
943int __init ceph_snap_init(void)
944{
945 empty_snapc = ceph_create_snap_context(0, GFP_NOFS);
946 if (!empty_snapc)
947 return -ENOMEM;
948 empty_snapc->seq = 1;
949 return 0;
950}
931 951
932 952void ceph_snap_exit(void)
953{
954 ceph_put_snap_context(empty_snapc);
955}
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index f6e12377335c..50f06cddc94b 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -515,7 +515,8 @@ static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt,
515 struct ceph_fs_client *fsc; 515 struct ceph_fs_client *fsc;
516 const u64 supported_features = 516 const u64 supported_features =
517 CEPH_FEATURE_FLOCK | 517 CEPH_FEATURE_FLOCK |
518 CEPH_FEATURE_DIRLAYOUTHASH; 518 CEPH_FEATURE_DIRLAYOUTHASH |
519 CEPH_FEATURE_MDS_INLINE_DATA;
519 const u64 required_features = 0; 520 const u64 required_features = 0;
520 int page_count; 521 int page_count;
521 size_t size; 522 size_t size;
@@ -1017,9 +1018,6 @@ static struct file_system_type ceph_fs_type = {
1017}; 1018};
1018MODULE_ALIAS_FS("ceph"); 1019MODULE_ALIAS_FS("ceph");
1019 1020
1020#define _STRINGIFY(x) #x
1021#define STRINGIFY(x) _STRINGIFY(x)
1022
1023static int __init init_ceph(void) 1021static int __init init_ceph(void)
1024{ 1022{
1025 int ret = init_caches(); 1023 int ret = init_caches();
@@ -1028,15 +1026,20 @@ static int __init init_ceph(void)
1028 1026
1029 ceph_flock_init(); 1027 ceph_flock_init();
1030 ceph_xattr_init(); 1028 ceph_xattr_init();
1029 ret = ceph_snap_init();
1030 if (ret)
1031 goto out_xattr;
1031 ret = register_filesystem(&ceph_fs_type); 1032 ret = register_filesystem(&ceph_fs_type);
1032 if (ret) 1033 if (ret)
1033 goto out_icache; 1034 goto out_snap;
1034 1035
1035 pr_info("loaded (mds proto %d)\n", CEPH_MDSC_PROTOCOL); 1036 pr_info("loaded (mds proto %d)\n", CEPH_MDSC_PROTOCOL);
1036 1037
1037 return 0; 1038 return 0;
1038 1039
1039out_icache: 1040out_snap:
1041 ceph_snap_exit();
1042out_xattr:
1040 ceph_xattr_exit(); 1043 ceph_xattr_exit();
1041 destroy_caches(); 1044 destroy_caches();
1042out: 1045out:
@@ -1047,6 +1050,7 @@ static void __exit exit_ceph(void)
1047{ 1050{
1048 dout("exit_ceph\n"); 1051 dout("exit_ceph\n");
1049 unregister_filesystem(&ceph_fs_type); 1052 unregister_filesystem(&ceph_fs_type);
1053 ceph_snap_exit();
1050 ceph_xattr_exit(); 1054 ceph_xattr_exit();
1051 destroy_caches(); 1055 destroy_caches();
1052} 1056}
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index b82f507979b8..e1aa32d0759d 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -161,6 +161,7 @@ struct ceph_cap_snap {
161 u64 time_warp_seq; 161 u64 time_warp_seq;
162 int writing; /* a sync write is still in progress */ 162 int writing; /* a sync write is still in progress */
163 int dirty_pages; /* dirty pages awaiting writeback */ 163 int dirty_pages; /* dirty pages awaiting writeback */
164 bool inline_data;
164}; 165};
165 166
166static inline void ceph_put_cap_snap(struct ceph_cap_snap *capsnap) 167static inline void ceph_put_cap_snap(struct ceph_cap_snap *capsnap)
@@ -253,9 +254,11 @@ struct ceph_inode_info {
253 spinlock_t i_ceph_lock; 254 spinlock_t i_ceph_lock;
254 255
255 u64 i_version; 256 u64 i_version;
257 u64 i_inline_version;
256 u32 i_time_warp_seq; 258 u32 i_time_warp_seq;
257 259
258 unsigned i_ceph_flags; 260 unsigned i_ceph_flags;
261 int i_ordered_count;
259 atomic_t i_release_count; 262 atomic_t i_release_count;
260 atomic_t i_complete_count; 263 atomic_t i_complete_count;
261 264
@@ -434,14 +437,19 @@ static inline struct inode *ceph_find_inode(struct super_block *sb,
434/* 437/*
435 * Ceph inode. 438 * Ceph inode.
436 */ 439 */
437#define CEPH_I_NODELAY 4 /* do not delay cap release */ 440#define CEPH_I_DIR_ORDERED 1 /* dentries in dir are ordered */
438#define CEPH_I_FLUSH 8 /* do not delay flush of dirty metadata */ 441#define CEPH_I_NODELAY 4 /* do not delay cap release */
439#define CEPH_I_NOFLUSH 16 /* do not flush dirty caps */ 442#define CEPH_I_FLUSH 8 /* do not delay flush of dirty metadata */
443#define CEPH_I_NOFLUSH 16 /* do not flush dirty caps */
440 444
441static inline void __ceph_dir_set_complete(struct ceph_inode_info *ci, 445static inline void __ceph_dir_set_complete(struct ceph_inode_info *ci,
442 int release_count) 446 int release_count, int ordered_count)
443{ 447{
444 atomic_set(&ci->i_complete_count, release_count); 448 atomic_set(&ci->i_complete_count, release_count);
449 if (ci->i_ordered_count == ordered_count)
450 ci->i_ceph_flags |= CEPH_I_DIR_ORDERED;
451 else
452 ci->i_ceph_flags &= ~CEPH_I_DIR_ORDERED;
445} 453}
446 454
447static inline void __ceph_dir_clear_complete(struct ceph_inode_info *ci) 455static inline void __ceph_dir_clear_complete(struct ceph_inode_info *ci)
@@ -455,16 +463,35 @@ static inline bool __ceph_dir_is_complete(struct ceph_inode_info *ci)
455 atomic_read(&ci->i_release_count); 463 atomic_read(&ci->i_release_count);
456} 464}
457 465
466static inline bool __ceph_dir_is_complete_ordered(struct ceph_inode_info *ci)
467{
468 return __ceph_dir_is_complete(ci) &&
469 (ci->i_ceph_flags & CEPH_I_DIR_ORDERED);
470}
471
458static inline void ceph_dir_clear_complete(struct inode *inode) 472static inline void ceph_dir_clear_complete(struct inode *inode)
459{ 473{
460 __ceph_dir_clear_complete(ceph_inode(inode)); 474 __ceph_dir_clear_complete(ceph_inode(inode));
461} 475}
462 476
463static inline bool ceph_dir_is_complete(struct inode *inode) 477static inline void ceph_dir_clear_ordered(struct inode *inode)
464{ 478{
465 return __ceph_dir_is_complete(ceph_inode(inode)); 479 struct ceph_inode_info *ci = ceph_inode(inode);
480 spin_lock(&ci->i_ceph_lock);
481 ci->i_ordered_count++;
482 ci->i_ceph_flags &= ~CEPH_I_DIR_ORDERED;
483 spin_unlock(&ci->i_ceph_lock);
466} 484}
467 485
486static inline bool ceph_dir_is_complete_ordered(struct inode *inode)
487{
488 struct ceph_inode_info *ci = ceph_inode(inode);
489 bool ret;
490 spin_lock(&ci->i_ceph_lock);
491 ret = __ceph_dir_is_complete_ordered(ci);
492 spin_unlock(&ci->i_ceph_lock);
493 return ret;
494}
468 495
469/* find a specific frag @f */ 496/* find a specific frag @f */
470extern struct ceph_inode_frag *__ceph_find_frag(struct ceph_inode_info *ci, 497extern struct ceph_inode_frag *__ceph_find_frag(struct ceph_inode_info *ci,
@@ -580,6 +607,7 @@ struct ceph_file_info {
580 char *last_name; /* last entry in previous chunk */ 607 char *last_name; /* last entry in previous chunk */
581 struct dentry *dentry; /* next dentry (for dcache readdir) */ 608 struct dentry *dentry; /* next dentry (for dcache readdir) */
582 int dir_release_count; 609 int dir_release_count;
610 int dir_ordered_count;
583 611
584 /* used for -o dirstat read() on directory thing */ 612 /* used for -o dirstat read() on directory thing */
585 char *dir_info; 613 char *dir_info;
@@ -673,6 +701,8 @@ extern void ceph_queue_cap_snap(struct ceph_inode_info *ci);
673extern int __ceph_finish_cap_snap(struct ceph_inode_info *ci, 701extern int __ceph_finish_cap_snap(struct ceph_inode_info *ci,
674 struct ceph_cap_snap *capsnap); 702 struct ceph_cap_snap *capsnap);
675extern void ceph_cleanup_empty_realms(struct ceph_mds_client *mdsc); 703extern void ceph_cleanup_empty_realms(struct ceph_mds_client *mdsc);
704extern int ceph_snap_init(void);
705extern void ceph_snap_exit(void);
676 706
677/* 707/*
678 * a cap_snap is "pending" if it is still awaiting an in-progress 708 * a cap_snap is "pending" if it is still awaiting an in-progress
@@ -715,7 +745,12 @@ extern void ceph_queue_vmtruncate(struct inode *inode);
715extern void ceph_queue_invalidate(struct inode *inode); 745extern void ceph_queue_invalidate(struct inode *inode);
716extern void ceph_queue_writeback(struct inode *inode); 746extern void ceph_queue_writeback(struct inode *inode);
717 747
718extern int ceph_do_getattr(struct inode *inode, int mask, bool force); 748extern int __ceph_do_getattr(struct inode *inode, struct page *locked_page,
749 int mask, bool force);
750static inline int ceph_do_getattr(struct inode *inode, int mask, bool force)
751{
752 return __ceph_do_getattr(inode, NULL, mask, force);
753}
719extern int ceph_permission(struct inode *inode, int mask); 754extern int ceph_permission(struct inode *inode, int mask);
720extern int ceph_setattr(struct dentry *dentry, struct iattr *attr); 755extern int ceph_setattr(struct dentry *dentry, struct iattr *attr);
721extern int ceph_getattr(struct vfsmount *mnt, struct dentry *dentry, 756extern int ceph_getattr(struct vfsmount *mnt, struct dentry *dentry,
@@ -830,7 +865,7 @@ extern int ceph_encode_dentry_release(void **p, struct dentry *dn,
830 int mds, int drop, int unless); 865 int mds, int drop, int unless);
831 866
832extern int ceph_get_caps(struct ceph_inode_info *ci, int need, int want, 867extern int ceph_get_caps(struct ceph_inode_info *ci, int need, int want,
833 int *got, loff_t endoff); 868 loff_t endoff, int *got, struct page **pinned_page);
834 869
835/* for counting open files by mode */ 870/* for counting open files by mode */
836static inline void __ceph_get_fmode(struct ceph_inode_info *ci, int mode) 871static inline void __ceph_get_fmode(struct ceph_inode_info *ci, int mode)
@@ -852,7 +887,9 @@ extern int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
852 struct file *file, unsigned flags, umode_t mode, 887 struct file *file, unsigned flags, umode_t mode,
853 int *opened); 888 int *opened);
854extern int ceph_release(struct inode *inode, struct file *filp); 889extern int ceph_release(struct inode *inode, struct file *filp);
855 890extern void ceph_fill_inline_data(struct inode *inode, struct page *locked_page,
891 char *data, size_t len);
892int ceph_uninline_data(struct file *filp, struct page *locked_page);
856/* dir.c */ 893/* dir.c */
857extern const struct file_operations ceph_dir_fops; 894extern const struct file_operations ceph_dir_fops;
858extern const struct inode_operations ceph_dir_iops; 895extern const struct inode_operations ceph_dir_iops;
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index 678b0d2bbbc4..5a492caf34cb 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -854,7 +854,7 @@ static int ceph_sync_setxattr(struct dentry *dentry, const char *name,
854 struct ceph_pagelist *pagelist = NULL; 854 struct ceph_pagelist *pagelist = NULL;
855 int err; 855 int err;
856 856
857 if (value) { 857 if (size > 0) {
858 /* copy value into pagelist */ 858 /* copy value into pagelist */
859 pagelist = kmalloc(sizeof(*pagelist), GFP_NOFS); 859 pagelist = kmalloc(sizeof(*pagelist), GFP_NOFS);
860 if (!pagelist) 860 if (!pagelist)
@@ -864,7 +864,7 @@ static int ceph_sync_setxattr(struct dentry *dentry, const char *name,
864 err = ceph_pagelist_append(pagelist, value, size); 864 err = ceph_pagelist_append(pagelist, value, size);
865 if (err) 865 if (err)
866 goto out; 866 goto out;
867 } else { 867 } else if (!value) {
868 flags |= CEPH_XATTR_REMOVE; 868 flags |= CEPH_XATTR_REMOVE;
869 } 869 }
870 870
@@ -1001,6 +1001,9 @@ int ceph_setxattr(struct dentry *dentry, const char *name,
1001 if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN)) 1001 if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
1002 return generic_setxattr(dentry, name, value, size, flags); 1002 return generic_setxattr(dentry, name, value, size, flags);
1003 1003
1004 if (size == 0)
1005 value = ""; /* empty EA, do not remove */
1006
1004 return __ceph_setxattr(dentry, name, value, size, flags); 1007 return __ceph_setxattr(dentry, name, value, size, flags);
1005} 1008}
1006 1009
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index c2d6604667b0..719e1ce1c609 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -1917,7 +1917,6 @@ ecryptfs_decode_from_filename(unsigned char *dst, size_t *dst_size,
1917 break; 1917 break;
1918 case 2: 1918 case 2:
1919 dst[dst_byte_offset++] |= (src_byte); 1919 dst[dst_byte_offset++] |= (src_byte);
1920 dst[dst_byte_offset] = 0;
1921 current_bit_offset = 0; 1920 current_bit_offset = 0;
1922 break; 1921 break;
1923 } 1922 }
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index 80154ec4f8c2..6f4e659f508f 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -190,23 +190,11 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
190{ 190{
191 int rc = 0; 191 int rc = 0;
192 struct ecryptfs_crypt_stat *crypt_stat = NULL; 192 struct ecryptfs_crypt_stat *crypt_stat = NULL;
193 struct ecryptfs_mount_crypt_stat *mount_crypt_stat;
194 struct dentry *ecryptfs_dentry = file->f_path.dentry; 193 struct dentry *ecryptfs_dentry = file->f_path.dentry;
195 /* Private value of ecryptfs_dentry allocated in 194 /* Private value of ecryptfs_dentry allocated in
196 * ecryptfs_lookup() */ 195 * ecryptfs_lookup() */
197 struct ecryptfs_file_info *file_info; 196 struct ecryptfs_file_info *file_info;
198 197
199 mount_crypt_stat = &ecryptfs_superblock_to_private(
200 ecryptfs_dentry->d_sb)->mount_crypt_stat;
201 if ((mount_crypt_stat->flags & ECRYPTFS_ENCRYPTED_VIEW_ENABLED)
202 && ((file->f_flags & O_WRONLY) || (file->f_flags & O_RDWR)
203 || (file->f_flags & O_CREAT) || (file->f_flags & O_TRUNC)
204 || (file->f_flags & O_APPEND))) {
205 printk(KERN_WARNING "Mount has encrypted view enabled; "
206 "files may only be read\n");
207 rc = -EPERM;
208 goto out;
209 }
210 /* Released in ecryptfs_release or end of function if failure */ 198 /* Released in ecryptfs_release or end of function if failure */
211 file_info = kmem_cache_zalloc(ecryptfs_file_info_cache, GFP_KERNEL); 199 file_info = kmem_cache_zalloc(ecryptfs_file_info_cache, GFP_KERNEL);
212 ecryptfs_set_file_private(file, file_info); 200 ecryptfs_set_file_private(file, file_info);
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c
index 635e8e16a5b7..917bd5c9776a 100644
--- a/fs/ecryptfs/keystore.c
+++ b/fs/ecryptfs/keystore.c
@@ -100,12 +100,12 @@ int ecryptfs_parse_packet_length(unsigned char *data, size_t *size,
100 (*size) = 0; 100 (*size) = 0;
101 if (data[0] < 192) { 101 if (data[0] < 192) {
102 /* One-byte length */ 102 /* One-byte length */
103 (*size) = (unsigned char)data[0]; 103 (*size) = data[0];
104 (*length_size) = 1; 104 (*length_size) = 1;
105 } else if (data[0] < 224) { 105 } else if (data[0] < 224) {
106 /* Two-byte length */ 106 /* Two-byte length */
107 (*size) = (((unsigned char)(data[0]) - 192) * 256); 107 (*size) = (data[0] - 192) * 256;
108 (*size) += ((unsigned char)(data[1]) + 192); 108 (*size) += data[1] + 192;
109 (*length_size) = 2; 109 (*length_size) = 2;
110 } else if (data[0] == 255) { 110 } else if (data[0] == 255) {
111 /* If support is added, adjust ECRYPTFS_MAX_PKT_LEN_SIZE */ 111 /* If support is added, adjust ECRYPTFS_MAX_PKT_LEN_SIZE */
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index c4cd1fd86cc2..d9eb84bda559 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -493,6 +493,7 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags
493{ 493{
494 struct super_block *s; 494 struct super_block *s;
495 struct ecryptfs_sb_info *sbi; 495 struct ecryptfs_sb_info *sbi;
496 struct ecryptfs_mount_crypt_stat *mount_crypt_stat;
496 struct ecryptfs_dentry_info *root_info; 497 struct ecryptfs_dentry_info *root_info;
497 const char *err = "Getting sb failed"; 498 const char *err = "Getting sb failed";
498 struct inode *inode; 499 struct inode *inode;
@@ -511,6 +512,7 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags
511 err = "Error parsing options"; 512 err = "Error parsing options";
512 goto out; 513 goto out;
513 } 514 }
515 mount_crypt_stat = &sbi->mount_crypt_stat;
514 516
515 s = sget(fs_type, NULL, set_anon_super, flags, NULL); 517 s = sget(fs_type, NULL, set_anon_super, flags, NULL);
516 if (IS_ERR(s)) { 518 if (IS_ERR(s)) {
@@ -557,11 +559,19 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags
557 559
558 /** 560 /**
559 * Set the POSIX ACL flag based on whether they're enabled in the lower 561 * Set the POSIX ACL flag based on whether they're enabled in the lower
560 * mount. Force a read-only eCryptfs mount if the lower mount is ro. 562 * mount.
561 * Allow a ro eCryptfs mount even when the lower mount is rw.
562 */ 563 */
563 s->s_flags = flags & ~MS_POSIXACL; 564 s->s_flags = flags & ~MS_POSIXACL;
564 s->s_flags |= path.dentry->d_sb->s_flags & (MS_RDONLY | MS_POSIXACL); 565 s->s_flags |= path.dentry->d_sb->s_flags & MS_POSIXACL;
566
567 /**
568 * Force a read-only eCryptfs mount when:
569 * 1) The lower mount is ro
570 * 2) The ecryptfs_encrypted_view mount option is specified
571 */
572 if (path.dentry->d_sb->s_flags & MS_RDONLY ||
573 mount_crypt_stat->flags & ECRYPTFS_ENCRYPTED_VIEW_ENABLED)
574 s->s_flags |= MS_RDONLY;
565 575
566 s->s_maxbytes = path.dentry->d_sb->s_maxbytes; 576 s->s_maxbytes = path.dentry->d_sb->s_maxbytes;
567 s->s_blocksize = path.dentry->d_sb->s_blocksize; 577 s->s_blocksize = path.dentry->d_sb->s_blocksize;
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index 503ea15dc5db..370420bfae8d 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -267,7 +267,6 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
267 handle_t *handle; 267 handle_t *handle;
268 ext4_lblk_t orig_blk_offset, donor_blk_offset; 268 ext4_lblk_t orig_blk_offset, donor_blk_offset;
269 unsigned long blocksize = orig_inode->i_sb->s_blocksize; 269 unsigned long blocksize = orig_inode->i_sb->s_blocksize;
270 unsigned int w_flags = 0;
271 unsigned int tmp_data_size, data_size, replaced_size; 270 unsigned int tmp_data_size, data_size, replaced_size;
272 int err2, jblocks, retries = 0; 271 int err2, jblocks, retries = 0;
273 int replaced_count = 0; 272 int replaced_count = 0;
@@ -288,9 +287,6 @@ again:
288 return 0; 287 return 0;
289 } 288 }
290 289
291 if (segment_eq(get_fs(), KERNEL_DS))
292 w_flags |= AOP_FLAG_UNINTERRUPTIBLE;
293
294 orig_blk_offset = orig_page_offset * blocks_per_page + 290 orig_blk_offset = orig_page_offset * blocks_per_page +
295 data_offset_in_page; 291 data_offset_in_page;
296 292
diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c
index 966ace8b243f..28d0c7abba1c 100644
--- a/fs/fuse/cuse.c
+++ b/fs/fuse/cuse.c
@@ -415,7 +415,7 @@ err_unlock:
415err_region: 415err_region:
416 unregister_chrdev_region(devt, 1); 416 unregister_chrdev_region(devt, 1);
417err: 417err:
418 fuse_conn_kill(fc); 418 fuse_abort_conn(fc);
419 goto out; 419 goto out;
420} 420}
421 421
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index ca887314aba9..ba1107977f2e 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -511,6 +511,35 @@ void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req)
511} 511}
512EXPORT_SYMBOL_GPL(fuse_request_send); 512EXPORT_SYMBOL_GPL(fuse_request_send);
513 513
514ssize_t fuse_simple_request(struct fuse_conn *fc, struct fuse_args *args)
515{
516 struct fuse_req *req;
517 ssize_t ret;
518
519 req = fuse_get_req(fc, 0);
520 if (IS_ERR(req))
521 return PTR_ERR(req);
522
523 req->in.h.opcode = args->in.h.opcode;
524 req->in.h.nodeid = args->in.h.nodeid;
525 req->in.numargs = args->in.numargs;
526 memcpy(req->in.args, args->in.args,
527 args->in.numargs * sizeof(struct fuse_in_arg));
528 req->out.argvar = args->out.argvar;
529 req->out.numargs = args->out.numargs;
530 memcpy(req->out.args, args->out.args,
531 args->out.numargs * sizeof(struct fuse_arg));
532 fuse_request_send(fc, req);
533 ret = req->out.h.error;
534 if (!ret && args->out.argvar) {
535 BUG_ON(args->out.numargs != 1);
536 ret = req->out.args[0].size;
537 }
538 fuse_put_request(fc, req);
539
540 return ret;
541}
542
514static void fuse_request_send_nowait_locked(struct fuse_conn *fc, 543static void fuse_request_send_nowait_locked(struct fuse_conn *fc,
515 struct fuse_req *req) 544 struct fuse_req *req)
516{ 545{
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index df562cc87763..252b8a5de8b5 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -145,22 +145,22 @@ static void fuse_invalidate_entry(struct dentry *entry)
145 fuse_invalidate_entry_cache(entry); 145 fuse_invalidate_entry_cache(entry);
146} 146}
147 147
148static void fuse_lookup_init(struct fuse_conn *fc, struct fuse_req *req, 148static void fuse_lookup_init(struct fuse_conn *fc, struct fuse_args *args,
149 u64 nodeid, struct qstr *name, 149 u64 nodeid, struct qstr *name,
150 struct fuse_entry_out *outarg) 150 struct fuse_entry_out *outarg)
151{ 151{
152 memset(outarg, 0, sizeof(struct fuse_entry_out)); 152 memset(outarg, 0, sizeof(struct fuse_entry_out));
153 req->in.h.opcode = FUSE_LOOKUP; 153 args->in.h.opcode = FUSE_LOOKUP;
154 req->in.h.nodeid = nodeid; 154 args->in.h.nodeid = nodeid;
155 req->in.numargs = 1; 155 args->in.numargs = 1;
156 req->in.args[0].size = name->len + 1; 156 args->in.args[0].size = name->len + 1;
157 req->in.args[0].value = name->name; 157 args->in.args[0].value = name->name;
158 req->out.numargs = 1; 158 args->out.numargs = 1;
159 if (fc->minor < 9) 159 if (fc->minor < 9)
160 req->out.args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE; 160 args->out.args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE;
161 else 161 else
162 req->out.args[0].size = sizeof(struct fuse_entry_out); 162 args->out.args[0].size = sizeof(struct fuse_entry_out);
163 req->out.args[0].value = outarg; 163 args->out.args[0].value = outarg;
164} 164}
165 165
166u64 fuse_get_attr_version(struct fuse_conn *fc) 166u64 fuse_get_attr_version(struct fuse_conn *fc)
@@ -200,9 +200,8 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
200 goto invalid; 200 goto invalid;
201 else if (time_before64(fuse_dentry_time(entry), get_jiffies_64()) || 201 else if (time_before64(fuse_dentry_time(entry), get_jiffies_64()) ||
202 (flags & LOOKUP_REVAL)) { 202 (flags & LOOKUP_REVAL)) {
203 int err;
204 struct fuse_entry_out outarg; 203 struct fuse_entry_out outarg;
205 struct fuse_req *req; 204 FUSE_ARGS(args);
206 struct fuse_forget_link *forget; 205 struct fuse_forget_link *forget;
207 u64 attr_version; 206 u64 attr_version;
208 207
@@ -215,31 +214,23 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
215 goto out; 214 goto out;
216 215
217 fc = get_fuse_conn(inode); 216 fc = get_fuse_conn(inode);
218 req = fuse_get_req_nopages(fc);
219 ret = PTR_ERR(req);
220 if (IS_ERR(req))
221 goto out;
222 217
223 forget = fuse_alloc_forget(); 218 forget = fuse_alloc_forget();
224 if (!forget) { 219 ret = -ENOMEM;
225 fuse_put_request(fc, req); 220 if (!forget)
226 ret = -ENOMEM;
227 goto out; 221 goto out;
228 }
229 222
230 attr_version = fuse_get_attr_version(fc); 223 attr_version = fuse_get_attr_version(fc);
231 224
232 parent = dget_parent(entry); 225 parent = dget_parent(entry);
233 fuse_lookup_init(fc, req, get_node_id(parent->d_inode), 226 fuse_lookup_init(fc, &args, get_node_id(parent->d_inode),
234 &entry->d_name, &outarg); 227 &entry->d_name, &outarg);
235 fuse_request_send(fc, req); 228 ret = fuse_simple_request(fc, &args);
236 dput(parent); 229 dput(parent);
237 err = req->out.h.error;
238 fuse_put_request(fc, req);
239 /* Zero nodeid is same as -ENOENT */ 230 /* Zero nodeid is same as -ENOENT */
240 if (!err && !outarg.nodeid) 231 if (!ret && !outarg.nodeid)
241 err = -ENOENT; 232 ret = -ENOENT;
242 if (!err) { 233 if (!ret) {
243 fi = get_fuse_inode(inode); 234 fi = get_fuse_inode(inode);
244 if (outarg.nodeid != get_node_id(inode)) { 235 if (outarg.nodeid != get_node_id(inode)) {
245 fuse_queue_forget(fc, forget, outarg.nodeid, 1); 236 fuse_queue_forget(fc, forget, outarg.nodeid, 1);
@@ -250,7 +241,9 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
250 spin_unlock(&fc->lock); 241 spin_unlock(&fc->lock);
251 } 242 }
252 kfree(forget); 243 kfree(forget);
253 if (err || (outarg.attr.mode ^ inode->i_mode) & S_IFMT) 244 if (ret == -ENOMEM)
245 goto out;
246 if (ret || (outarg.attr.mode ^ inode->i_mode) & S_IFMT)
254 goto invalid; 247 goto invalid;
255 248
256 fuse_change_attributes(inode, &outarg.attr, 249 fuse_change_attributes(inode, &outarg.attr,
@@ -296,7 +289,7 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name,
296 struct fuse_entry_out *outarg, struct inode **inode) 289 struct fuse_entry_out *outarg, struct inode **inode)
297{ 290{
298 struct fuse_conn *fc = get_fuse_conn_super(sb); 291 struct fuse_conn *fc = get_fuse_conn_super(sb);
299 struct fuse_req *req; 292 FUSE_ARGS(args);
300 struct fuse_forget_link *forget; 293 struct fuse_forget_link *forget;
301 u64 attr_version; 294 u64 attr_version;
302 int err; 295 int err;
@@ -306,24 +299,16 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name,
306 if (name->len > FUSE_NAME_MAX) 299 if (name->len > FUSE_NAME_MAX)
307 goto out; 300 goto out;
308 301
309 req = fuse_get_req_nopages(fc);
310 err = PTR_ERR(req);
311 if (IS_ERR(req))
312 goto out;
313 302
314 forget = fuse_alloc_forget(); 303 forget = fuse_alloc_forget();
315 err = -ENOMEM; 304 err = -ENOMEM;
316 if (!forget) { 305 if (!forget)
317 fuse_put_request(fc, req);
318 goto out; 306 goto out;
319 }
320 307
321 attr_version = fuse_get_attr_version(fc); 308 attr_version = fuse_get_attr_version(fc);
322 309
323 fuse_lookup_init(fc, req, nodeid, name, outarg); 310 fuse_lookup_init(fc, &args, nodeid, name, outarg);
324 fuse_request_send(fc, req); 311 err = fuse_simple_request(fc, &args);
325 err = req->out.h.error;
326 fuse_put_request(fc, req);
327 /* Zero nodeid is same as -ENOENT, but with valid timeout */ 312 /* Zero nodeid is same as -ENOENT, but with valid timeout */
328 if (err || !outarg->nodeid) 313 if (err || !outarg->nodeid)
329 goto out_put_forget; 314 goto out_put_forget;
@@ -405,7 +390,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
405 int err; 390 int err;
406 struct inode *inode; 391 struct inode *inode;
407 struct fuse_conn *fc = get_fuse_conn(dir); 392 struct fuse_conn *fc = get_fuse_conn(dir);
408 struct fuse_req *req; 393 FUSE_ARGS(args);
409 struct fuse_forget_link *forget; 394 struct fuse_forget_link *forget;
410 struct fuse_create_in inarg; 395 struct fuse_create_in inarg;
411 struct fuse_open_out outopen; 396 struct fuse_open_out outopen;
@@ -420,15 +405,10 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
420 if (!forget) 405 if (!forget)
421 goto out_err; 406 goto out_err;
422 407
423 req = fuse_get_req_nopages(fc);
424 err = PTR_ERR(req);
425 if (IS_ERR(req))
426 goto out_put_forget_req;
427
428 err = -ENOMEM; 408 err = -ENOMEM;
429 ff = fuse_file_alloc(fc); 409 ff = fuse_file_alloc(fc);
430 if (!ff) 410 if (!ff)
431 goto out_put_request; 411 goto out_put_forget_req;
432 412
433 if (!fc->dont_mask) 413 if (!fc->dont_mask)
434 mode &= ~current_umask(); 414 mode &= ~current_umask();
@@ -439,24 +419,23 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
439 inarg.flags = flags; 419 inarg.flags = flags;
440 inarg.mode = mode; 420 inarg.mode = mode;
441 inarg.umask = current_umask(); 421 inarg.umask = current_umask();
442 req->in.h.opcode = FUSE_CREATE; 422 args.in.h.opcode = FUSE_CREATE;
443 req->in.h.nodeid = get_node_id(dir); 423 args.in.h.nodeid = get_node_id(dir);
444 req->in.numargs = 2; 424 args.in.numargs = 2;
445 req->in.args[0].size = fc->minor < 12 ? sizeof(struct fuse_open_in) : 425 args.in.args[0].size = fc->minor < 12 ? sizeof(struct fuse_open_in) :
446 sizeof(inarg); 426 sizeof(inarg);
447 req->in.args[0].value = &inarg; 427 args.in.args[0].value = &inarg;
448 req->in.args[1].size = entry->d_name.len + 1; 428 args.in.args[1].size = entry->d_name.len + 1;
449 req->in.args[1].value = entry->d_name.name; 429 args.in.args[1].value = entry->d_name.name;
450 req->out.numargs = 2; 430 args.out.numargs = 2;
451 if (fc->minor < 9) 431 if (fc->minor < 9)
452 req->out.args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE; 432 args.out.args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE;
453 else 433 else
454 req->out.args[0].size = sizeof(outentry); 434 args.out.args[0].size = sizeof(outentry);
455 req->out.args[0].value = &outentry; 435 args.out.args[0].value = &outentry;
456 req->out.args[1].size = sizeof(outopen); 436 args.out.args[1].size = sizeof(outopen);
457 req->out.args[1].value = &outopen; 437 args.out.args[1].value = &outopen;
458 fuse_request_send(fc, req); 438 err = fuse_simple_request(fc, &args);
459 err = req->out.h.error;
460 if (err) 439 if (err)
461 goto out_free_ff; 440 goto out_free_ff;
462 441
@@ -464,7 +443,6 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
464 if (!S_ISREG(outentry.attr.mode) || invalid_nodeid(outentry.nodeid)) 443 if (!S_ISREG(outentry.attr.mode) || invalid_nodeid(outentry.nodeid))
465 goto out_free_ff; 444 goto out_free_ff;
466 445
467 fuse_put_request(fc, req);
468 ff->fh = outopen.fh; 446 ff->fh = outopen.fh;
469 ff->nodeid = outentry.nodeid; 447 ff->nodeid = outentry.nodeid;
470 ff->open_flags = outopen.open_flags; 448 ff->open_flags = outopen.open_flags;
@@ -492,8 +470,6 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
492 470
493out_free_ff: 471out_free_ff:
494 fuse_file_free(ff); 472 fuse_file_free(ff);
495out_put_request:
496 fuse_put_request(fc, req);
497out_put_forget_req: 473out_put_forget_req:
498 kfree(forget); 474 kfree(forget);
499out_err: 475out_err:
@@ -547,7 +523,7 @@ no_open:
547/* 523/*
548 * Code shared between mknod, mkdir, symlink and link 524 * Code shared between mknod, mkdir, symlink and link
549 */ 525 */
550static int create_new_entry(struct fuse_conn *fc, struct fuse_req *req, 526static int create_new_entry(struct fuse_conn *fc, struct fuse_args *args,
551 struct inode *dir, struct dentry *entry, 527 struct inode *dir, struct dentry *entry,
552 umode_t mode) 528 umode_t mode)
553{ 529{
@@ -557,22 +533,18 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_req *req,
557 struct fuse_forget_link *forget; 533 struct fuse_forget_link *forget;
558 534
559 forget = fuse_alloc_forget(); 535 forget = fuse_alloc_forget();
560 if (!forget) { 536 if (!forget)
561 fuse_put_request(fc, req);
562 return -ENOMEM; 537 return -ENOMEM;
563 }
564 538
565 memset(&outarg, 0, sizeof(outarg)); 539 memset(&outarg, 0, sizeof(outarg));
566 req->in.h.nodeid = get_node_id(dir); 540 args->in.h.nodeid = get_node_id(dir);
567 req->out.numargs = 1; 541 args->out.numargs = 1;
568 if (fc->minor < 9) 542 if (fc->minor < 9)
569 req->out.args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE; 543 args->out.args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE;
570 else 544 else
571 req->out.args[0].size = sizeof(outarg); 545 args->out.args[0].size = sizeof(outarg);
572 req->out.args[0].value = &outarg; 546 args->out.args[0].value = &outarg;
573 fuse_request_send(fc, req); 547 err = fuse_simple_request(fc, args);
574 err = req->out.h.error;
575 fuse_put_request(fc, req);
576 if (err) 548 if (err)
577 goto out_put_forget_req; 549 goto out_put_forget_req;
578 550
@@ -609,9 +581,7 @@ static int fuse_mknod(struct inode *dir, struct dentry *entry, umode_t mode,
609{ 581{
610 struct fuse_mknod_in inarg; 582 struct fuse_mknod_in inarg;
611 struct fuse_conn *fc = get_fuse_conn(dir); 583 struct fuse_conn *fc = get_fuse_conn(dir);
612 struct fuse_req *req = fuse_get_req_nopages(fc); 584 FUSE_ARGS(args);
613 if (IS_ERR(req))
614 return PTR_ERR(req);
615 585
616 if (!fc->dont_mask) 586 if (!fc->dont_mask)
617 mode &= ~current_umask(); 587 mode &= ~current_umask();
@@ -620,14 +590,14 @@ static int fuse_mknod(struct inode *dir, struct dentry *entry, umode_t mode,
620 inarg.mode = mode; 590 inarg.mode = mode;
621 inarg.rdev = new_encode_dev(rdev); 591 inarg.rdev = new_encode_dev(rdev);
622 inarg.umask = current_umask(); 592 inarg.umask = current_umask();
623 req->in.h.opcode = FUSE_MKNOD; 593 args.in.h.opcode = FUSE_MKNOD;
624 req->in.numargs = 2; 594 args.in.numargs = 2;
625 req->in.args[0].size = fc->minor < 12 ? FUSE_COMPAT_MKNOD_IN_SIZE : 595 args.in.args[0].size = fc->minor < 12 ? FUSE_COMPAT_MKNOD_IN_SIZE :
626 sizeof(inarg); 596 sizeof(inarg);
627 req->in.args[0].value = &inarg; 597 args.in.args[0].value = &inarg;
628 req->in.args[1].size = entry->d_name.len + 1; 598 args.in.args[1].size = entry->d_name.len + 1;
629 req->in.args[1].value = entry->d_name.name; 599 args.in.args[1].value = entry->d_name.name;
630 return create_new_entry(fc, req, dir, entry, mode); 600 return create_new_entry(fc, &args, dir, entry, mode);
631} 601}
632 602
633static int fuse_create(struct inode *dir, struct dentry *entry, umode_t mode, 603static int fuse_create(struct inode *dir, struct dentry *entry, umode_t mode,
@@ -640,9 +610,7 @@ static int fuse_mkdir(struct inode *dir, struct dentry *entry, umode_t mode)
640{ 610{
641 struct fuse_mkdir_in inarg; 611 struct fuse_mkdir_in inarg;
642 struct fuse_conn *fc = get_fuse_conn(dir); 612 struct fuse_conn *fc = get_fuse_conn(dir);
643 struct fuse_req *req = fuse_get_req_nopages(fc); 613 FUSE_ARGS(args);
644 if (IS_ERR(req))
645 return PTR_ERR(req);
646 614
647 if (!fc->dont_mask) 615 if (!fc->dont_mask)
648 mode &= ~current_umask(); 616 mode &= ~current_umask();
@@ -650,13 +618,13 @@ static int fuse_mkdir(struct inode *dir, struct dentry *entry, umode_t mode)
650 memset(&inarg, 0, sizeof(inarg)); 618 memset(&inarg, 0, sizeof(inarg));
651 inarg.mode = mode; 619 inarg.mode = mode;
652 inarg.umask = current_umask(); 620 inarg.umask = current_umask();
653 req->in.h.opcode = FUSE_MKDIR; 621 args.in.h.opcode = FUSE_MKDIR;
654 req->in.numargs = 2; 622 args.in.numargs = 2;
655 req->in.args[0].size = sizeof(inarg); 623 args.in.args[0].size = sizeof(inarg);
656 req->in.args[0].value = &inarg; 624 args.in.args[0].value = &inarg;
657 req->in.args[1].size = entry->d_name.len + 1; 625 args.in.args[1].size = entry->d_name.len + 1;
658 req->in.args[1].value = entry->d_name.name; 626 args.in.args[1].value = entry->d_name.name;
659 return create_new_entry(fc, req, dir, entry, S_IFDIR); 627 return create_new_entry(fc, &args, dir, entry, S_IFDIR);
660} 628}
661 629
662static int fuse_symlink(struct inode *dir, struct dentry *entry, 630static int fuse_symlink(struct inode *dir, struct dentry *entry,
@@ -664,17 +632,15 @@ static int fuse_symlink(struct inode *dir, struct dentry *entry,
664{ 632{
665 struct fuse_conn *fc = get_fuse_conn(dir); 633 struct fuse_conn *fc = get_fuse_conn(dir);
666 unsigned len = strlen(link) + 1; 634 unsigned len = strlen(link) + 1;
667 struct fuse_req *req = fuse_get_req_nopages(fc); 635 FUSE_ARGS(args);
668 if (IS_ERR(req))
669 return PTR_ERR(req);
670 636
671 req->in.h.opcode = FUSE_SYMLINK; 637 args.in.h.opcode = FUSE_SYMLINK;
672 req->in.numargs = 2; 638 args.in.numargs = 2;
673 req->in.args[0].size = entry->d_name.len + 1; 639 args.in.args[0].size = entry->d_name.len + 1;
674 req->in.args[0].value = entry->d_name.name; 640 args.in.args[0].value = entry->d_name.name;
675 req->in.args[1].size = len; 641 args.in.args[1].size = len;
676 req->in.args[1].value = link; 642 args.in.args[1].value = link;
677 return create_new_entry(fc, req, dir, entry, S_IFLNK); 643 return create_new_entry(fc, &args, dir, entry, S_IFLNK);
678} 644}
679 645
680static inline void fuse_update_ctime(struct inode *inode) 646static inline void fuse_update_ctime(struct inode *inode)
@@ -689,18 +655,14 @@ static int fuse_unlink(struct inode *dir, struct dentry *entry)
689{ 655{
690 int err; 656 int err;
691 struct fuse_conn *fc = get_fuse_conn(dir); 657 struct fuse_conn *fc = get_fuse_conn(dir);
692 struct fuse_req *req = fuse_get_req_nopages(fc); 658 FUSE_ARGS(args);
693 if (IS_ERR(req)) 659
694 return PTR_ERR(req); 660 args.in.h.opcode = FUSE_UNLINK;
695 661 args.in.h.nodeid = get_node_id(dir);
696 req->in.h.opcode = FUSE_UNLINK; 662 args.in.numargs = 1;
697 req->in.h.nodeid = get_node_id(dir); 663 args.in.args[0].size = entry->d_name.len + 1;
698 req->in.numargs = 1; 664 args.in.args[0].value = entry->d_name.name;
699 req->in.args[0].size = entry->d_name.len + 1; 665 err = fuse_simple_request(fc, &args);
700 req->in.args[0].value = entry->d_name.name;
701 fuse_request_send(fc, req);
702 err = req->out.h.error;
703 fuse_put_request(fc, req);
704 if (!err) { 666 if (!err) {
705 struct inode *inode = entry->d_inode; 667 struct inode *inode = entry->d_inode;
706 struct fuse_inode *fi = get_fuse_inode(inode); 668 struct fuse_inode *fi = get_fuse_inode(inode);
@@ -729,18 +691,14 @@ static int fuse_rmdir(struct inode *dir, struct dentry *entry)
729{ 691{
730 int err; 692 int err;
731 struct fuse_conn *fc = get_fuse_conn(dir); 693 struct fuse_conn *fc = get_fuse_conn(dir);
732 struct fuse_req *req = fuse_get_req_nopages(fc); 694 FUSE_ARGS(args);
733 if (IS_ERR(req)) 695
734 return PTR_ERR(req); 696 args.in.h.opcode = FUSE_RMDIR;
735 697 args.in.h.nodeid = get_node_id(dir);
736 req->in.h.opcode = FUSE_RMDIR; 698 args.in.numargs = 1;
737 req->in.h.nodeid = get_node_id(dir); 699 args.in.args[0].size = entry->d_name.len + 1;
738 req->in.numargs = 1; 700 args.in.args[0].value = entry->d_name.name;
739 req->in.args[0].size = entry->d_name.len + 1; 701 err = fuse_simple_request(fc, &args);
740 req->in.args[0].value = entry->d_name.name;
741 fuse_request_send(fc, req);
742 err = req->out.h.error;
743 fuse_put_request(fc, req);
744 if (!err) { 702 if (!err) {
745 clear_nlink(entry->d_inode); 703 clear_nlink(entry->d_inode);
746 fuse_invalidate_attr(dir); 704 fuse_invalidate_attr(dir);
@@ -757,27 +715,21 @@ static int fuse_rename_common(struct inode *olddir, struct dentry *oldent,
757 int err; 715 int err;
758 struct fuse_rename2_in inarg; 716 struct fuse_rename2_in inarg;
759 struct fuse_conn *fc = get_fuse_conn(olddir); 717 struct fuse_conn *fc = get_fuse_conn(olddir);
760 struct fuse_req *req; 718 FUSE_ARGS(args);
761
762 req = fuse_get_req_nopages(fc);
763 if (IS_ERR(req))
764 return PTR_ERR(req);
765 719
766 memset(&inarg, 0, argsize); 720 memset(&inarg, 0, argsize);
767 inarg.newdir = get_node_id(newdir); 721 inarg.newdir = get_node_id(newdir);
768 inarg.flags = flags; 722 inarg.flags = flags;
769 req->in.h.opcode = opcode; 723 args.in.h.opcode = opcode;
770 req->in.h.nodeid = get_node_id(olddir); 724 args.in.h.nodeid = get_node_id(olddir);
771 req->in.numargs = 3; 725 args.in.numargs = 3;
772 req->in.args[0].size = argsize; 726 args.in.args[0].size = argsize;
773 req->in.args[0].value = &inarg; 727 args.in.args[0].value = &inarg;
774 req->in.args[1].size = oldent->d_name.len + 1; 728 args.in.args[1].size = oldent->d_name.len + 1;
775 req->in.args[1].value = oldent->d_name.name; 729 args.in.args[1].value = oldent->d_name.name;
776 req->in.args[2].size = newent->d_name.len + 1; 730 args.in.args[2].size = newent->d_name.len + 1;
777 req->in.args[2].value = newent->d_name.name; 731 args.in.args[2].value = newent->d_name.name;
778 fuse_request_send(fc, req); 732 err = fuse_simple_request(fc, &args);
779 err = req->out.h.error;
780 fuse_put_request(fc, req);
781 if (!err) { 733 if (!err) {
782 /* ctime changes */ 734 /* ctime changes */
783 fuse_invalidate_attr(oldent->d_inode); 735 fuse_invalidate_attr(oldent->d_inode);
@@ -849,19 +801,17 @@ static int fuse_link(struct dentry *entry, struct inode *newdir,
849 struct fuse_link_in inarg; 801 struct fuse_link_in inarg;
850 struct inode *inode = entry->d_inode; 802 struct inode *inode = entry->d_inode;
851 struct fuse_conn *fc = get_fuse_conn(inode); 803 struct fuse_conn *fc = get_fuse_conn(inode);
852 struct fuse_req *req = fuse_get_req_nopages(fc); 804 FUSE_ARGS(args);
853 if (IS_ERR(req))
854 return PTR_ERR(req);
855 805
856 memset(&inarg, 0, sizeof(inarg)); 806 memset(&inarg, 0, sizeof(inarg));
857 inarg.oldnodeid = get_node_id(inode); 807 inarg.oldnodeid = get_node_id(inode);
858 req->in.h.opcode = FUSE_LINK; 808 args.in.h.opcode = FUSE_LINK;
859 req->in.numargs = 2; 809 args.in.numargs = 2;
860 req->in.args[0].size = sizeof(inarg); 810 args.in.args[0].size = sizeof(inarg);
861 req->in.args[0].value = &inarg; 811 args.in.args[0].value = &inarg;
862 req->in.args[1].size = newent->d_name.len + 1; 812 args.in.args[1].size = newent->d_name.len + 1;
863 req->in.args[1].value = newent->d_name.name; 813 args.in.args[1].value = newent->d_name.name;
864 err = create_new_entry(fc, req, newdir, newent, inode->i_mode); 814 err = create_new_entry(fc, &args, newdir, newent, inode->i_mode);
865 /* Contrary to "normal" filesystems it can happen that link 815 /* Contrary to "normal" filesystems it can happen that link
866 makes two "logical" inodes point to the same "physical" 816 makes two "logical" inodes point to the same "physical"
867 inode. We invalidate the attributes of the old one, so it 817 inode. We invalidate the attributes of the old one, so it
@@ -929,13 +879,9 @@ static int fuse_do_getattr(struct inode *inode, struct kstat *stat,
929 struct fuse_getattr_in inarg; 879 struct fuse_getattr_in inarg;
930 struct fuse_attr_out outarg; 880 struct fuse_attr_out outarg;
931 struct fuse_conn *fc = get_fuse_conn(inode); 881 struct fuse_conn *fc = get_fuse_conn(inode);
932 struct fuse_req *req; 882 FUSE_ARGS(args);
933 u64 attr_version; 883 u64 attr_version;
934 884
935 req = fuse_get_req_nopages(fc);
936 if (IS_ERR(req))
937 return PTR_ERR(req);
938
939 attr_version = fuse_get_attr_version(fc); 885 attr_version = fuse_get_attr_version(fc);
940 886
941 memset(&inarg, 0, sizeof(inarg)); 887 memset(&inarg, 0, sizeof(inarg));
@@ -947,20 +893,18 @@ static int fuse_do_getattr(struct inode *inode, struct kstat *stat,
947 inarg.getattr_flags |= FUSE_GETATTR_FH; 893 inarg.getattr_flags |= FUSE_GETATTR_FH;
948 inarg.fh = ff->fh; 894 inarg.fh = ff->fh;
949 } 895 }
950 req->in.h.opcode = FUSE_GETATTR; 896 args.in.h.opcode = FUSE_GETATTR;
951 req->in.h.nodeid = get_node_id(inode); 897 args.in.h.nodeid = get_node_id(inode);
952 req->in.numargs = 1; 898 args.in.numargs = 1;
953 req->in.args[0].size = sizeof(inarg); 899 args.in.args[0].size = sizeof(inarg);
954 req->in.args[0].value = &inarg; 900 args.in.args[0].value = &inarg;
955 req->out.numargs = 1; 901 args.out.numargs = 1;
956 if (fc->minor < 9) 902 if (fc->minor < 9)
957 req->out.args[0].size = FUSE_COMPAT_ATTR_OUT_SIZE; 903 args.out.args[0].size = FUSE_COMPAT_ATTR_OUT_SIZE;
958 else 904 else
959 req->out.args[0].size = sizeof(outarg); 905 args.out.args[0].size = sizeof(outarg);
960 req->out.args[0].value = &outarg; 906 args.out.args[0].value = &outarg;
961 fuse_request_send(fc, req); 907 err = fuse_simple_request(fc, &args);
962 err = req->out.h.error;
963 fuse_put_request(fc, req);
964 if (!err) { 908 if (!err) {
965 if ((inode->i_mode ^ outarg.attr.mode) & S_IFMT) { 909 if ((inode->i_mode ^ outarg.attr.mode) & S_IFMT) {
966 make_bad_inode(inode); 910 make_bad_inode(inode);
@@ -1102,7 +1046,7 @@ int fuse_allow_current_process(struct fuse_conn *fc)
1102static int fuse_access(struct inode *inode, int mask) 1046static int fuse_access(struct inode *inode, int mask)
1103{ 1047{
1104 struct fuse_conn *fc = get_fuse_conn(inode); 1048 struct fuse_conn *fc = get_fuse_conn(inode);
1105 struct fuse_req *req; 1049 FUSE_ARGS(args);
1106 struct fuse_access_in inarg; 1050 struct fuse_access_in inarg;
1107 int err; 1051 int err;
1108 1052
@@ -1111,20 +1055,14 @@ static int fuse_access(struct inode *inode, int mask)
1111 if (fc->no_access) 1055 if (fc->no_access)
1112 return 0; 1056 return 0;
1113 1057
1114 req = fuse_get_req_nopages(fc);
1115 if (IS_ERR(req))
1116 return PTR_ERR(req);
1117
1118 memset(&inarg, 0, sizeof(inarg)); 1058 memset(&inarg, 0, sizeof(inarg));
1119 inarg.mask = mask & (MAY_READ | MAY_WRITE | MAY_EXEC); 1059 inarg.mask = mask & (MAY_READ | MAY_WRITE | MAY_EXEC);
1120 req->in.h.opcode = FUSE_ACCESS; 1060 args.in.h.opcode = FUSE_ACCESS;
1121 req->in.h.nodeid = get_node_id(inode); 1061 args.in.h.nodeid = get_node_id(inode);
1122 req->in.numargs = 1; 1062 args.in.numargs = 1;
1123 req->in.args[0].size = sizeof(inarg); 1063 args.in.args[0].size = sizeof(inarg);
1124 req->in.args[0].value = &inarg; 1064 args.in.args[0].value = &inarg;
1125 fuse_request_send(fc, req); 1065 err = fuse_simple_request(fc, &args);
1126 err = req->out.h.error;
1127 fuse_put_request(fc, req);
1128 if (err == -ENOSYS) { 1066 if (err == -ENOSYS) {
1129 fc->no_access = 1; 1067 fc->no_access = 1;
1130 err = 0; 1068 err = 0;
@@ -1445,31 +1383,27 @@ static char *read_link(struct dentry *dentry)
1445{ 1383{
1446 struct inode *inode = dentry->d_inode; 1384 struct inode *inode = dentry->d_inode;
1447 struct fuse_conn *fc = get_fuse_conn(inode); 1385 struct fuse_conn *fc = get_fuse_conn(inode);
1448 struct fuse_req *req = fuse_get_req_nopages(fc); 1386 FUSE_ARGS(args);
1449 char *link; 1387 char *link;
1450 1388 ssize_t ret;
1451 if (IS_ERR(req))
1452 return ERR_CAST(req);
1453 1389
1454 link = (char *) __get_free_page(GFP_KERNEL); 1390 link = (char *) __get_free_page(GFP_KERNEL);
1455 if (!link) { 1391 if (!link)
1456 link = ERR_PTR(-ENOMEM); 1392 return ERR_PTR(-ENOMEM);
1457 goto out; 1393
1458 } 1394 args.in.h.opcode = FUSE_READLINK;
1459 req->in.h.opcode = FUSE_READLINK; 1395 args.in.h.nodeid = get_node_id(inode);
1460 req->in.h.nodeid = get_node_id(inode); 1396 args.out.argvar = 1;
1461 req->out.argvar = 1; 1397 args.out.numargs = 1;
1462 req->out.numargs = 1; 1398 args.out.args[0].size = PAGE_SIZE - 1;
1463 req->out.args[0].size = PAGE_SIZE - 1; 1399 args.out.args[0].value = link;
1464 req->out.args[0].value = link; 1400 ret = fuse_simple_request(fc, &args);
1465 fuse_request_send(fc, req); 1401 if (ret < 0) {
1466 if (req->out.h.error) {
1467 free_page((unsigned long) link); 1402 free_page((unsigned long) link);
1468 link = ERR_PTR(req->out.h.error); 1403 link = ERR_PTR(ret);
1469 } else 1404 } else {
1470 link[req->out.args[0].size] = '\0'; 1405 link[ret] = '\0';
1471 out: 1406 }
1472 fuse_put_request(fc, req);
1473 fuse_invalidate_atime(inode); 1407 fuse_invalidate_atime(inode);
1474 return link; 1408 return link;
1475} 1409}
@@ -1629,22 +1563,22 @@ void fuse_release_nowrite(struct inode *inode)
1629 spin_unlock(&fc->lock); 1563 spin_unlock(&fc->lock);
1630} 1564}
1631 1565
1632static void fuse_setattr_fill(struct fuse_conn *fc, struct fuse_req *req, 1566static void fuse_setattr_fill(struct fuse_conn *fc, struct fuse_args *args,
1633 struct inode *inode, 1567 struct inode *inode,
1634 struct fuse_setattr_in *inarg_p, 1568 struct fuse_setattr_in *inarg_p,
1635 struct fuse_attr_out *outarg_p) 1569 struct fuse_attr_out *outarg_p)
1636{ 1570{
1637 req->in.h.opcode = FUSE_SETATTR; 1571 args->in.h.opcode = FUSE_SETATTR;
1638 req->in.h.nodeid = get_node_id(inode); 1572 args->in.h.nodeid = get_node_id(inode);
1639 req->in.numargs = 1; 1573 args->in.numargs = 1;
1640 req->in.args[0].size = sizeof(*inarg_p); 1574 args->in.args[0].size = sizeof(*inarg_p);
1641 req->in.args[0].value = inarg_p; 1575 args->in.args[0].value = inarg_p;
1642 req->out.numargs = 1; 1576 args->out.numargs = 1;
1643 if (fc->minor < 9) 1577 if (fc->minor < 9)
1644 req->out.args[0].size = FUSE_COMPAT_ATTR_OUT_SIZE; 1578 args->out.args[0].size = FUSE_COMPAT_ATTR_OUT_SIZE;
1645 else 1579 else
1646 req->out.args[0].size = sizeof(*outarg_p); 1580 args->out.args[0].size = sizeof(*outarg_p);
1647 req->out.args[0].value = outarg_p; 1581 args->out.args[0].value = outarg_p;
1648} 1582}
1649 1583
1650/* 1584/*
@@ -1653,14 +1587,9 @@ static void fuse_setattr_fill(struct fuse_conn *fc, struct fuse_req *req,
1653int fuse_flush_times(struct inode *inode, struct fuse_file *ff) 1587int fuse_flush_times(struct inode *inode, struct fuse_file *ff)
1654{ 1588{
1655 struct fuse_conn *fc = get_fuse_conn(inode); 1589 struct fuse_conn *fc = get_fuse_conn(inode);
1656 struct fuse_req *req; 1590 FUSE_ARGS(args);
1657 struct fuse_setattr_in inarg; 1591 struct fuse_setattr_in inarg;
1658 struct fuse_attr_out outarg; 1592 struct fuse_attr_out outarg;
1659 int err;
1660
1661 req = fuse_get_req_nopages(fc);
1662 if (IS_ERR(req))
1663 return PTR_ERR(req);
1664 1593
1665 memset(&inarg, 0, sizeof(inarg)); 1594 memset(&inarg, 0, sizeof(inarg));
1666 memset(&outarg, 0, sizeof(outarg)); 1595 memset(&outarg, 0, sizeof(outarg));
@@ -1677,12 +1606,9 @@ int fuse_flush_times(struct inode *inode, struct fuse_file *ff)
1677 inarg.valid |= FATTR_FH; 1606 inarg.valid |= FATTR_FH;
1678 inarg.fh = ff->fh; 1607 inarg.fh = ff->fh;
1679 } 1608 }
1680 fuse_setattr_fill(fc, req, inode, &inarg, &outarg); 1609 fuse_setattr_fill(fc, &args, inode, &inarg, &outarg);
1681 fuse_request_send(fc, req);
1682 err = req->out.h.error;
1683 fuse_put_request(fc, req);
1684 1610
1685 return err; 1611 return fuse_simple_request(fc, &args);
1686} 1612}
1687 1613
1688/* 1614/*
@@ -1698,7 +1624,7 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr,
1698{ 1624{
1699 struct fuse_conn *fc = get_fuse_conn(inode); 1625 struct fuse_conn *fc = get_fuse_conn(inode);
1700 struct fuse_inode *fi = get_fuse_inode(inode); 1626 struct fuse_inode *fi = get_fuse_inode(inode);
1701 struct fuse_req *req; 1627 FUSE_ARGS(args);
1702 struct fuse_setattr_in inarg; 1628 struct fuse_setattr_in inarg;
1703 struct fuse_attr_out outarg; 1629 struct fuse_attr_out outarg;
1704 bool is_truncate = false; 1630 bool is_truncate = false;
@@ -1723,10 +1649,6 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr,
1723 if (attr->ia_valid & ATTR_SIZE) 1649 if (attr->ia_valid & ATTR_SIZE)
1724 is_truncate = true; 1650 is_truncate = true;
1725 1651
1726 req = fuse_get_req_nopages(fc);
1727 if (IS_ERR(req))
1728 return PTR_ERR(req);
1729
1730 if (is_truncate) { 1652 if (is_truncate) {
1731 fuse_set_nowrite(inode); 1653 fuse_set_nowrite(inode);
1732 set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state); 1654 set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
@@ -1747,10 +1669,8 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr,
1747 inarg.valid |= FATTR_LOCKOWNER; 1669 inarg.valid |= FATTR_LOCKOWNER;
1748 inarg.lock_owner = fuse_lock_owner_id(fc, current->files); 1670 inarg.lock_owner = fuse_lock_owner_id(fc, current->files);
1749 } 1671 }
1750 fuse_setattr_fill(fc, req, inode, &inarg, &outarg); 1672 fuse_setattr_fill(fc, &args, inode, &inarg, &outarg);
1751 fuse_request_send(fc, req); 1673 err = fuse_simple_request(fc, &args);
1752 err = req->out.h.error;
1753 fuse_put_request(fc, req);
1754 if (err) { 1674 if (err) {
1755 if (err == -EINTR) 1675 if (err == -EINTR)
1756 fuse_invalidate_attr(inode); 1676 fuse_invalidate_attr(inode);
@@ -1837,32 +1757,26 @@ static int fuse_setxattr(struct dentry *entry, const char *name,
1837{ 1757{
1838 struct inode *inode = entry->d_inode; 1758 struct inode *inode = entry->d_inode;
1839 struct fuse_conn *fc = get_fuse_conn(inode); 1759 struct fuse_conn *fc = get_fuse_conn(inode);
1840 struct fuse_req *req; 1760 FUSE_ARGS(args);
1841 struct fuse_setxattr_in inarg; 1761 struct fuse_setxattr_in inarg;
1842 int err; 1762 int err;
1843 1763
1844 if (fc->no_setxattr) 1764 if (fc->no_setxattr)
1845 return -EOPNOTSUPP; 1765 return -EOPNOTSUPP;
1846 1766
1847 req = fuse_get_req_nopages(fc);
1848 if (IS_ERR(req))
1849 return PTR_ERR(req);
1850
1851 memset(&inarg, 0, sizeof(inarg)); 1767 memset(&inarg, 0, sizeof(inarg));
1852 inarg.size = size; 1768 inarg.size = size;
1853 inarg.flags = flags; 1769 inarg.flags = flags;
1854 req->in.h.opcode = FUSE_SETXATTR; 1770 args.in.h.opcode = FUSE_SETXATTR;
1855 req->in.h.nodeid = get_node_id(inode); 1771 args.in.h.nodeid = get_node_id(inode);
1856 req->in.numargs = 3; 1772 args.in.numargs = 3;
1857 req->in.args[0].size = sizeof(inarg); 1773 args.in.args[0].size = sizeof(inarg);
1858 req->in.args[0].value = &inarg; 1774 args.in.args[0].value = &inarg;
1859 req->in.args[1].size = strlen(name) + 1; 1775 args.in.args[1].size = strlen(name) + 1;
1860 req->in.args[1].value = name; 1776 args.in.args[1].value = name;
1861 req->in.args[2].size = size; 1777 args.in.args[2].size = size;
1862 req->in.args[2].value = value; 1778 args.in.args[2].value = value;
1863 fuse_request_send(fc, req); 1779 err = fuse_simple_request(fc, &args);
1864 err = req->out.h.error;
1865 fuse_put_request(fc, req);
1866 if (err == -ENOSYS) { 1780 if (err == -ENOSYS) {
1867 fc->no_setxattr = 1; 1781 fc->no_setxattr = 1;
1868 err = -EOPNOTSUPP; 1782 err = -EOPNOTSUPP;
@@ -1879,7 +1793,7 @@ static ssize_t fuse_getxattr(struct dentry *entry, const char *name,
1879{ 1793{
1880 struct inode *inode = entry->d_inode; 1794 struct inode *inode = entry->d_inode;
1881 struct fuse_conn *fc = get_fuse_conn(inode); 1795 struct fuse_conn *fc = get_fuse_conn(inode);
1882 struct fuse_req *req; 1796 FUSE_ARGS(args);
1883 struct fuse_getxattr_in inarg; 1797 struct fuse_getxattr_in inarg;
1884 struct fuse_getxattr_out outarg; 1798 struct fuse_getxattr_out outarg;
1885 ssize_t ret; 1799 ssize_t ret;
@@ -1887,40 +1801,32 @@ static ssize_t fuse_getxattr(struct dentry *entry, const char *name,
1887 if (fc->no_getxattr) 1801 if (fc->no_getxattr)
1888 return -EOPNOTSUPP; 1802 return -EOPNOTSUPP;
1889 1803
1890 req = fuse_get_req_nopages(fc);
1891 if (IS_ERR(req))
1892 return PTR_ERR(req);
1893
1894 memset(&inarg, 0, sizeof(inarg)); 1804 memset(&inarg, 0, sizeof(inarg));
1895 inarg.size = size; 1805 inarg.size = size;
1896 req->in.h.opcode = FUSE_GETXATTR; 1806 args.in.h.opcode = FUSE_GETXATTR;
1897 req->in.h.nodeid = get_node_id(inode); 1807 args.in.h.nodeid = get_node_id(inode);
1898 req->in.numargs = 2; 1808 args.in.numargs = 2;
1899 req->in.args[0].size = sizeof(inarg); 1809 args.in.args[0].size = sizeof(inarg);
1900 req->in.args[0].value = &inarg; 1810 args.in.args[0].value = &inarg;
1901 req->in.args[1].size = strlen(name) + 1; 1811 args.in.args[1].size = strlen(name) + 1;
1902 req->in.args[1].value = name; 1812 args.in.args[1].value = name;
1903 /* This is really two different operations rolled into one */ 1813 /* This is really two different operations rolled into one */
1904 req->out.numargs = 1; 1814 args.out.numargs = 1;
1905 if (size) { 1815 if (size) {
1906 req->out.argvar = 1; 1816 args.out.argvar = 1;
1907 req->out.args[0].size = size; 1817 args.out.args[0].size = size;
1908 req->out.args[0].value = value; 1818 args.out.args[0].value = value;
1909 } else { 1819 } else {
1910 req->out.args[0].size = sizeof(outarg); 1820 args.out.args[0].size = sizeof(outarg);
1911 req->out.args[0].value = &outarg; 1821 args.out.args[0].value = &outarg;
1912 } 1822 }
1913 fuse_request_send(fc, req); 1823 ret = fuse_simple_request(fc, &args);
1914 ret = req->out.h.error; 1824 if (!ret && !size)
1915 if (!ret) 1825 ret = outarg.size;
1916 ret = size ? req->out.args[0].size : outarg.size; 1826 if (ret == -ENOSYS) {
1917 else { 1827 fc->no_getxattr = 1;
1918 if (ret == -ENOSYS) { 1828 ret = -EOPNOTSUPP;
1919 fc->no_getxattr = 1;
1920 ret = -EOPNOTSUPP;
1921 }
1922 } 1829 }
1923 fuse_put_request(fc, req);
1924 return ret; 1830 return ret;
1925} 1831}
1926 1832
@@ -1928,7 +1834,7 @@ static ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size)
1928{ 1834{
1929 struct inode *inode = entry->d_inode; 1835 struct inode *inode = entry->d_inode;
1930 struct fuse_conn *fc = get_fuse_conn(inode); 1836 struct fuse_conn *fc = get_fuse_conn(inode);
1931 struct fuse_req *req; 1837 FUSE_ARGS(args);
1932 struct fuse_getxattr_in inarg; 1838 struct fuse_getxattr_in inarg;
1933 struct fuse_getxattr_out outarg; 1839 struct fuse_getxattr_out outarg;
1934 ssize_t ret; 1840 ssize_t ret;
@@ -1939,38 +1845,30 @@ static ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size)
1939 if (fc->no_listxattr) 1845 if (fc->no_listxattr)
1940 return -EOPNOTSUPP; 1846 return -EOPNOTSUPP;
1941 1847
1942 req = fuse_get_req_nopages(fc);
1943 if (IS_ERR(req))
1944 return PTR_ERR(req);
1945
1946 memset(&inarg, 0, sizeof(inarg)); 1848 memset(&inarg, 0, sizeof(inarg));
1947 inarg.size = size; 1849 inarg.size = size;
1948 req->in.h.opcode = FUSE_LISTXATTR; 1850 args.in.h.opcode = FUSE_LISTXATTR;
1949 req->in.h.nodeid = get_node_id(inode); 1851 args.in.h.nodeid = get_node_id(inode);
1950 req->in.numargs = 1; 1852 args.in.numargs = 1;
1951 req->in.args[0].size = sizeof(inarg); 1853 args.in.args[0].size = sizeof(inarg);
1952 req->in.args[0].value = &inarg; 1854 args.in.args[0].value = &inarg;
1953 /* This is really two different operations rolled into one */ 1855 /* This is really two different operations rolled into one */
1954 req->out.numargs = 1; 1856 args.out.numargs = 1;
1955 if (size) { 1857 if (size) {
1956 req->out.argvar = 1; 1858 args.out.argvar = 1;
1957 req->out.args[0].size = size; 1859 args.out.args[0].size = size;
1958 req->out.args[0].value = list; 1860 args.out.args[0].value = list;
1959 } else { 1861 } else {
1960 req->out.args[0].size = sizeof(outarg); 1862 args.out.args[0].size = sizeof(outarg);
1961 req->out.args[0].value = &outarg; 1863 args.out.args[0].value = &outarg;
1962 } 1864 }
1963 fuse_request_send(fc, req); 1865 ret = fuse_simple_request(fc, &args);
1964 ret = req->out.h.error; 1866 if (!ret && !size)
1965 if (!ret) 1867 ret = outarg.size;
1966 ret = size ? req->out.args[0].size : outarg.size; 1868 if (ret == -ENOSYS) {
1967 else { 1869 fc->no_listxattr = 1;
1968 if (ret == -ENOSYS) { 1870 ret = -EOPNOTSUPP;
1969 fc->no_listxattr = 1;
1970 ret = -EOPNOTSUPP;
1971 }
1972 } 1871 }
1973 fuse_put_request(fc, req);
1974 return ret; 1872 return ret;
1975} 1873}
1976 1874
@@ -1978,24 +1876,18 @@ static int fuse_removexattr(struct dentry *entry, const char *name)
1978{ 1876{
1979 struct inode *inode = entry->d_inode; 1877 struct inode *inode = entry->d_inode;
1980 struct fuse_conn *fc = get_fuse_conn(inode); 1878 struct fuse_conn *fc = get_fuse_conn(inode);
1981 struct fuse_req *req; 1879 FUSE_ARGS(args);
1982 int err; 1880 int err;
1983 1881
1984 if (fc->no_removexattr) 1882 if (fc->no_removexattr)
1985 return -EOPNOTSUPP; 1883 return -EOPNOTSUPP;
1986 1884
1987 req = fuse_get_req_nopages(fc); 1885 args.in.h.opcode = FUSE_REMOVEXATTR;
1988 if (IS_ERR(req)) 1886 args.in.h.nodeid = get_node_id(inode);
1989 return PTR_ERR(req); 1887 args.in.numargs = 1;
1990 1888 args.in.args[0].size = strlen(name) + 1;
1991 req->in.h.opcode = FUSE_REMOVEXATTR; 1889 args.in.args[0].value = name;
1992 req->in.h.nodeid = get_node_id(inode); 1890 err = fuse_simple_request(fc, &args);
1993 req->in.numargs = 1;
1994 req->in.args[0].size = strlen(name) + 1;
1995 req->in.args[0].value = name;
1996 fuse_request_send(fc, req);
1997 err = req->out.h.error;
1998 fuse_put_request(fc, req);
1999 if (err == -ENOSYS) { 1891 if (err == -ENOSYS) {
2000 fc->no_removexattr = 1; 1892 fc->no_removexattr = 1;
2001 err = -EOPNOTSUPP; 1893 err = -EOPNOTSUPP;
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index bf50259012ab..760b2c552197 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -24,30 +24,22 @@ static int fuse_send_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
24 int opcode, struct fuse_open_out *outargp) 24 int opcode, struct fuse_open_out *outargp)
25{ 25{
26 struct fuse_open_in inarg; 26 struct fuse_open_in inarg;
27 struct fuse_req *req; 27 FUSE_ARGS(args);
28 int err;
29
30 req = fuse_get_req_nopages(fc);
31 if (IS_ERR(req))
32 return PTR_ERR(req);
33 28
34 memset(&inarg, 0, sizeof(inarg)); 29 memset(&inarg, 0, sizeof(inarg));
35 inarg.flags = file->f_flags & ~(O_CREAT | O_EXCL | O_NOCTTY); 30 inarg.flags = file->f_flags & ~(O_CREAT | O_EXCL | O_NOCTTY);
36 if (!fc->atomic_o_trunc) 31 if (!fc->atomic_o_trunc)
37 inarg.flags &= ~O_TRUNC; 32 inarg.flags &= ~O_TRUNC;
38 req->in.h.opcode = opcode; 33 args.in.h.opcode = opcode;
39 req->in.h.nodeid = nodeid; 34 args.in.h.nodeid = nodeid;
40 req->in.numargs = 1; 35 args.in.numargs = 1;
41 req->in.args[0].size = sizeof(inarg); 36 args.in.args[0].size = sizeof(inarg);
42 req->in.args[0].value = &inarg; 37 args.in.args[0].value = &inarg;
43 req->out.numargs = 1; 38 args.out.numargs = 1;
44 req->out.args[0].size = sizeof(*outargp); 39 args.out.args[0].size = sizeof(*outargp);
45 req->out.args[0].value = outargp; 40 args.out.args[0].value = outargp;
46 fuse_request_send(fc, req);
47 err = req->out.h.error;
48 fuse_put_request(fc, req);
49 41
50 return err; 42 return fuse_simple_request(fc, &args);
51} 43}
52 44
53struct fuse_file *fuse_file_alloc(struct fuse_conn *fc) 45struct fuse_file *fuse_file_alloc(struct fuse_conn *fc)
@@ -89,37 +81,9 @@ struct fuse_file *fuse_file_get(struct fuse_file *ff)
89 return ff; 81 return ff;
90} 82}
91 83
92static void fuse_release_async(struct work_struct *work)
93{
94 struct fuse_req *req;
95 struct fuse_conn *fc;
96 struct path path;
97
98 req = container_of(work, struct fuse_req, misc.release.work);
99 path = req->misc.release.path;
100 fc = get_fuse_conn(path.dentry->d_inode);
101
102 fuse_put_request(fc, req);
103 path_put(&path);
104}
105
106static void fuse_release_end(struct fuse_conn *fc, struct fuse_req *req) 84static void fuse_release_end(struct fuse_conn *fc, struct fuse_req *req)
107{ 85{
108 if (fc->destroy_req) { 86 iput(req->misc.release.inode);
109 /*
110 * If this is a fuseblk mount, then it's possible that
111 * releasing the path will result in releasing the
112 * super block and sending the DESTROY request. If
113 * the server is single threaded, this would hang.
114 * For this reason do the path_put() in a separate
115 * thread.
116 */
117 atomic_inc(&req->count);
118 INIT_WORK(&req->misc.release.work, fuse_release_async);
119 schedule_work(&req->misc.release.work);
120 } else {
121 path_put(&req->misc.release.path);
122 }
123} 87}
124 88
125static void fuse_file_put(struct fuse_file *ff, bool sync) 89static void fuse_file_put(struct fuse_file *ff, bool sync)
@@ -133,12 +97,12 @@ static void fuse_file_put(struct fuse_file *ff, bool sync)
133 * implement 'open' 97 * implement 'open'
134 */ 98 */
135 req->background = 0; 99 req->background = 0;
136 path_put(&req->misc.release.path); 100 iput(req->misc.release.inode);
137 fuse_put_request(ff->fc, req); 101 fuse_put_request(ff->fc, req);
138 } else if (sync) { 102 } else if (sync) {
139 req->background = 0; 103 req->background = 0;
140 fuse_request_send(ff->fc, req); 104 fuse_request_send(ff->fc, req);
141 path_put(&req->misc.release.path); 105 iput(req->misc.release.inode);
142 fuse_put_request(ff->fc, req); 106 fuse_put_request(ff->fc, req);
143 } else { 107 } else {
144 req->end = fuse_release_end; 108 req->end = fuse_release_end;
@@ -297,9 +261,8 @@ void fuse_release_common(struct file *file, int opcode)
297 inarg->lock_owner = fuse_lock_owner_id(ff->fc, 261 inarg->lock_owner = fuse_lock_owner_id(ff->fc,
298 (fl_owner_t) file); 262 (fl_owner_t) file);
299 } 263 }
300 /* Hold vfsmount and dentry until release is finished */ 264 /* Hold inode until release is finished */
301 path_get(&file->f_path); 265 req->misc.release.inode = igrab(file_inode(file));
302 req->misc.release.path = file->f_path;
303 266
304 /* 267 /*
305 * Normally this will send the RELEASE request, however if 268 * Normally this will send the RELEASE request, however if
@@ -480,7 +443,7 @@ int fuse_fsync_common(struct file *file, loff_t start, loff_t end,
480 struct inode *inode = file->f_mapping->host; 443 struct inode *inode = file->f_mapping->host;
481 struct fuse_conn *fc = get_fuse_conn(inode); 444 struct fuse_conn *fc = get_fuse_conn(inode);
482 struct fuse_file *ff = file->private_data; 445 struct fuse_file *ff = file->private_data;
483 struct fuse_req *req; 446 FUSE_ARGS(args);
484 struct fuse_fsync_in inarg; 447 struct fuse_fsync_in inarg;
485 int err; 448 int err;
486 449
@@ -506,23 +469,15 @@ int fuse_fsync_common(struct file *file, loff_t start, loff_t end,
506 if ((!isdir && fc->no_fsync) || (isdir && fc->no_fsyncdir)) 469 if ((!isdir && fc->no_fsync) || (isdir && fc->no_fsyncdir))
507 goto out; 470 goto out;
508 471
509 req = fuse_get_req_nopages(fc);
510 if (IS_ERR(req)) {
511 err = PTR_ERR(req);
512 goto out;
513 }
514
515 memset(&inarg, 0, sizeof(inarg)); 472 memset(&inarg, 0, sizeof(inarg));
516 inarg.fh = ff->fh; 473 inarg.fh = ff->fh;
517 inarg.fsync_flags = datasync ? 1 : 0; 474 inarg.fsync_flags = datasync ? 1 : 0;
518 req->in.h.opcode = isdir ? FUSE_FSYNCDIR : FUSE_FSYNC; 475 args.in.h.opcode = isdir ? FUSE_FSYNCDIR : FUSE_FSYNC;
519 req->in.h.nodeid = get_node_id(inode); 476 args.in.h.nodeid = get_node_id(inode);
520 req->in.numargs = 1; 477 args.in.numargs = 1;
521 req->in.args[0].size = sizeof(inarg); 478 args.in.args[0].size = sizeof(inarg);
522 req->in.args[0].value = &inarg; 479 args.in.args[0].value = &inarg;
523 fuse_request_send(fc, req); 480 err = fuse_simple_request(fc, &args);
524 err = req->out.h.error;
525 fuse_put_request(fc, req);
526 if (err == -ENOSYS) { 481 if (err == -ENOSYS) {
527 if (isdir) 482 if (isdir)
528 fc->no_fsyncdir = 1; 483 fc->no_fsyncdir = 1;
@@ -2156,49 +2111,44 @@ static int convert_fuse_file_lock(const struct fuse_file_lock *ffl,
2156 return 0; 2111 return 0;
2157} 2112}
2158 2113
2159static void fuse_lk_fill(struct fuse_req *req, struct file *file, 2114static void fuse_lk_fill(struct fuse_args *args, struct file *file,
2160 const struct file_lock *fl, int opcode, pid_t pid, 2115 const struct file_lock *fl, int opcode, pid_t pid,
2161 int flock) 2116 int flock, struct fuse_lk_in *inarg)
2162{ 2117{
2163 struct inode *inode = file_inode(file); 2118 struct inode *inode = file_inode(file);
2164 struct fuse_conn *fc = get_fuse_conn(inode); 2119 struct fuse_conn *fc = get_fuse_conn(inode);
2165 struct fuse_file *ff = file->private_data; 2120 struct fuse_file *ff = file->private_data;
2166 struct fuse_lk_in *arg = &req->misc.lk_in; 2121
2167 2122 memset(inarg, 0, sizeof(*inarg));
2168 arg->fh = ff->fh; 2123 inarg->fh = ff->fh;
2169 arg->owner = fuse_lock_owner_id(fc, fl->fl_owner); 2124 inarg->owner = fuse_lock_owner_id(fc, fl->fl_owner);
2170 arg->lk.start = fl->fl_start; 2125 inarg->lk.start = fl->fl_start;
2171 arg->lk.end = fl->fl_end; 2126 inarg->lk.end = fl->fl_end;
2172 arg->lk.type = fl->fl_type; 2127 inarg->lk.type = fl->fl_type;
2173 arg->lk.pid = pid; 2128 inarg->lk.pid = pid;
2174 if (flock) 2129 if (flock)
2175 arg->lk_flags |= FUSE_LK_FLOCK; 2130 inarg->lk_flags |= FUSE_LK_FLOCK;
2176 req->in.h.opcode = opcode; 2131 args->in.h.opcode = opcode;
2177 req->in.h.nodeid = get_node_id(inode); 2132 args->in.h.nodeid = get_node_id(inode);
2178 req->in.numargs = 1; 2133 args->in.numargs = 1;
2179 req->in.args[0].size = sizeof(*arg); 2134 args->in.args[0].size = sizeof(*inarg);
2180 req->in.args[0].value = arg; 2135 args->in.args[0].value = inarg;
2181} 2136}
2182 2137
2183static int fuse_getlk(struct file *file, struct file_lock *fl) 2138static int fuse_getlk(struct file *file, struct file_lock *fl)
2184{ 2139{
2185 struct inode *inode = file_inode(file); 2140 struct inode *inode = file_inode(file);
2186 struct fuse_conn *fc = get_fuse_conn(inode); 2141 struct fuse_conn *fc = get_fuse_conn(inode);
2187 struct fuse_req *req; 2142 FUSE_ARGS(args);
2143 struct fuse_lk_in inarg;
2188 struct fuse_lk_out outarg; 2144 struct fuse_lk_out outarg;
2189 int err; 2145 int err;
2190 2146
2191 req = fuse_get_req_nopages(fc); 2147 fuse_lk_fill(&args, file, fl, FUSE_GETLK, 0, 0, &inarg);
2192 if (IS_ERR(req)) 2148 args.out.numargs = 1;
2193 return PTR_ERR(req); 2149 args.out.args[0].size = sizeof(outarg);
2194 2150 args.out.args[0].value = &outarg;
2195 fuse_lk_fill(req, file, fl, FUSE_GETLK, 0, 0); 2151 err = fuse_simple_request(fc, &args);
2196 req->out.numargs = 1;
2197 req->out.args[0].size = sizeof(outarg);
2198 req->out.args[0].value = &outarg;
2199 fuse_request_send(fc, req);
2200 err = req->out.h.error;
2201 fuse_put_request(fc, req);
2202 if (!err) 2152 if (!err)
2203 err = convert_fuse_file_lock(&outarg.lk, fl); 2153 err = convert_fuse_file_lock(&outarg.lk, fl);
2204 2154
@@ -2209,7 +2159,8 @@ static int fuse_setlk(struct file *file, struct file_lock *fl, int flock)
2209{ 2159{
2210 struct inode *inode = file_inode(file); 2160 struct inode *inode = file_inode(file);
2211 struct fuse_conn *fc = get_fuse_conn(inode); 2161 struct fuse_conn *fc = get_fuse_conn(inode);
2212 struct fuse_req *req; 2162 FUSE_ARGS(args);
2163 struct fuse_lk_in inarg;
2213 int opcode = (fl->fl_flags & FL_SLEEP) ? FUSE_SETLKW : FUSE_SETLK; 2164 int opcode = (fl->fl_flags & FL_SLEEP) ? FUSE_SETLKW : FUSE_SETLK;
2214 pid_t pid = fl->fl_type != F_UNLCK ? current->tgid : 0; 2165 pid_t pid = fl->fl_type != F_UNLCK ? current->tgid : 0;
2215 int err; 2166 int err;
@@ -2223,17 +2174,13 @@ static int fuse_setlk(struct file *file, struct file_lock *fl, int flock)
2223 if (fl->fl_flags & FL_CLOSE) 2174 if (fl->fl_flags & FL_CLOSE)
2224 return 0; 2175 return 0;
2225 2176
2226 req = fuse_get_req_nopages(fc); 2177 fuse_lk_fill(&args, file, fl, opcode, pid, flock, &inarg);
2227 if (IS_ERR(req)) 2178 err = fuse_simple_request(fc, &args);
2228 return PTR_ERR(req);
2229 2179
2230 fuse_lk_fill(req, file, fl, opcode, pid, flock);
2231 fuse_request_send(fc, req);
2232 err = req->out.h.error;
2233 /* locking is restartable */ 2180 /* locking is restartable */
2234 if (err == -EINTR) 2181 if (err == -EINTR)
2235 err = -ERESTARTSYS; 2182 err = -ERESTARTSYS;
2236 fuse_put_request(fc, req); 2183
2237 return err; 2184 return err;
2238} 2185}
2239 2186
@@ -2283,7 +2230,7 @@ static sector_t fuse_bmap(struct address_space *mapping, sector_t block)
2283{ 2230{
2284 struct inode *inode = mapping->host; 2231 struct inode *inode = mapping->host;
2285 struct fuse_conn *fc = get_fuse_conn(inode); 2232 struct fuse_conn *fc = get_fuse_conn(inode);
2286 struct fuse_req *req; 2233 FUSE_ARGS(args);
2287 struct fuse_bmap_in inarg; 2234 struct fuse_bmap_in inarg;
2288 struct fuse_bmap_out outarg; 2235 struct fuse_bmap_out outarg;
2289 int err; 2236 int err;
@@ -2291,24 +2238,18 @@ static sector_t fuse_bmap(struct address_space *mapping, sector_t block)
2291 if (!inode->i_sb->s_bdev || fc->no_bmap) 2238 if (!inode->i_sb->s_bdev || fc->no_bmap)
2292 return 0; 2239 return 0;
2293 2240
2294 req = fuse_get_req_nopages(fc);
2295 if (IS_ERR(req))
2296 return 0;
2297
2298 memset(&inarg, 0, sizeof(inarg)); 2241 memset(&inarg, 0, sizeof(inarg));
2299 inarg.block = block; 2242 inarg.block = block;
2300 inarg.blocksize = inode->i_sb->s_blocksize; 2243 inarg.blocksize = inode->i_sb->s_blocksize;
2301 req->in.h.opcode = FUSE_BMAP; 2244 args.in.h.opcode = FUSE_BMAP;
2302 req->in.h.nodeid = get_node_id(inode); 2245 args.in.h.nodeid = get_node_id(inode);
2303 req->in.numargs = 1; 2246 args.in.numargs = 1;
2304 req->in.args[0].size = sizeof(inarg); 2247 args.in.args[0].size = sizeof(inarg);
2305 req->in.args[0].value = &inarg; 2248 args.in.args[0].value = &inarg;
2306 req->out.numargs = 1; 2249 args.out.numargs = 1;
2307 req->out.args[0].size = sizeof(outarg); 2250 args.out.args[0].size = sizeof(outarg);
2308 req->out.args[0].value = &outarg; 2251 args.out.args[0].value = &outarg;
2309 fuse_request_send(fc, req); 2252 err = fuse_simple_request(fc, &args);
2310 err = req->out.h.error;
2311 fuse_put_request(fc, req);
2312 if (err == -ENOSYS) 2253 if (err == -ENOSYS)
2313 fc->no_bmap = 1; 2254 fc->no_bmap = 1;
2314 2255
@@ -2776,7 +2717,7 @@ unsigned fuse_file_poll(struct file *file, poll_table *wait)
2776 struct fuse_conn *fc = ff->fc; 2717 struct fuse_conn *fc = ff->fc;
2777 struct fuse_poll_in inarg = { .fh = ff->fh, .kh = ff->kh }; 2718 struct fuse_poll_in inarg = { .fh = ff->fh, .kh = ff->kh };
2778 struct fuse_poll_out outarg; 2719 struct fuse_poll_out outarg;
2779 struct fuse_req *req; 2720 FUSE_ARGS(args);
2780 int err; 2721 int err;
2781 2722
2782 if (fc->no_poll) 2723 if (fc->no_poll)
@@ -2794,21 +2735,15 @@ unsigned fuse_file_poll(struct file *file, poll_table *wait)
2794 fuse_register_polled_file(fc, ff); 2735 fuse_register_polled_file(fc, ff);
2795 } 2736 }
2796 2737
2797 req = fuse_get_req_nopages(fc); 2738 args.in.h.opcode = FUSE_POLL;
2798 if (IS_ERR(req)) 2739 args.in.h.nodeid = ff->nodeid;
2799 return POLLERR; 2740 args.in.numargs = 1;
2800 2741 args.in.args[0].size = sizeof(inarg);
2801 req->in.h.opcode = FUSE_POLL; 2742 args.in.args[0].value = &inarg;
2802 req->in.h.nodeid = ff->nodeid; 2743 args.out.numargs = 1;
2803 req->in.numargs = 1; 2744 args.out.args[0].size = sizeof(outarg);
2804 req->in.args[0].size = sizeof(inarg); 2745 args.out.args[0].value = &outarg;
2805 req->in.args[0].value = &inarg; 2746 err = fuse_simple_request(fc, &args);
2806 req->out.numargs = 1;
2807 req->out.args[0].size = sizeof(outarg);
2808 req->out.args[0].value = &outarg;
2809 fuse_request_send(fc, req);
2810 err = req->out.h.error;
2811 fuse_put_request(fc, req);
2812 2747
2813 if (!err) 2748 if (!err)
2814 return outarg.revents; 2749 return outarg.revents;
@@ -2949,10 +2884,10 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
2949 loff_t length) 2884 loff_t length)
2950{ 2885{
2951 struct fuse_file *ff = file->private_data; 2886 struct fuse_file *ff = file->private_data;
2952 struct inode *inode = file->f_inode; 2887 struct inode *inode = file_inode(file);
2953 struct fuse_inode *fi = get_fuse_inode(inode); 2888 struct fuse_inode *fi = get_fuse_inode(inode);
2954 struct fuse_conn *fc = ff->fc; 2889 struct fuse_conn *fc = ff->fc;
2955 struct fuse_req *req; 2890 FUSE_ARGS(args);
2956 struct fuse_fallocate_in inarg = { 2891 struct fuse_fallocate_in inarg = {
2957 .fh = ff->fh, 2892 .fh = ff->fh,
2958 .offset = offset, 2893 .offset = offset,
@@ -2985,25 +2920,16 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
2985 if (!(mode & FALLOC_FL_KEEP_SIZE)) 2920 if (!(mode & FALLOC_FL_KEEP_SIZE))
2986 set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state); 2921 set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
2987 2922
2988 req = fuse_get_req_nopages(fc); 2923 args.in.h.opcode = FUSE_FALLOCATE;
2989 if (IS_ERR(req)) { 2924 args.in.h.nodeid = ff->nodeid;
2990 err = PTR_ERR(req); 2925 args.in.numargs = 1;
2991 goto out; 2926 args.in.args[0].size = sizeof(inarg);
2992 } 2927 args.in.args[0].value = &inarg;
2993 2928 err = fuse_simple_request(fc, &args);
2994 req->in.h.opcode = FUSE_FALLOCATE;
2995 req->in.h.nodeid = ff->nodeid;
2996 req->in.numargs = 1;
2997 req->in.args[0].size = sizeof(inarg);
2998 req->in.args[0].value = &inarg;
2999 fuse_request_send(fc, req);
3000 err = req->out.h.error;
3001 if (err == -ENOSYS) { 2929 if (err == -ENOSYS) {
3002 fc->no_fallocate = 1; 2930 fc->no_fallocate = 1;
3003 err = -EOPNOTSUPP; 2931 err = -EOPNOTSUPP;
3004 } 2932 }
3005 fuse_put_request(fc, req);
3006
3007 if (err) 2933 if (err)
3008 goto out; 2934 goto out;
3009 2935
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index e8e47a6ab518..e0fc6725d1d0 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -213,7 +213,7 @@ struct fuse_out {
213 unsigned numargs; 213 unsigned numargs;
214 214
215 /** Array of arguments */ 215 /** Array of arguments */
216 struct fuse_arg args[3]; 216 struct fuse_arg args[2];
217}; 217};
218 218
219/** FUSE page descriptor */ 219/** FUSE page descriptor */
@@ -222,6 +222,25 @@ struct fuse_page_desc {
222 unsigned int offset; 222 unsigned int offset;
223}; 223};
224 224
225struct fuse_args {
226 struct {
227 struct {
228 uint32_t opcode;
229 uint64_t nodeid;
230 } h;
231 unsigned numargs;
232 struct fuse_in_arg args[3];
233
234 } in;
235 struct {
236 unsigned argvar:1;
237 unsigned numargs;
238 struct fuse_arg args[2];
239 } out;
240};
241
242#define FUSE_ARGS(args) struct fuse_args args = {}
243
225/** The request state */ 244/** The request state */
226enum fuse_req_state { 245enum fuse_req_state {
227 FUSE_REQ_INIT = 0, 246 FUSE_REQ_INIT = 0,
@@ -305,11 +324,8 @@ struct fuse_req {
305 /** Data for asynchronous requests */ 324 /** Data for asynchronous requests */
306 union { 325 union {
307 struct { 326 struct {
308 union { 327 struct fuse_release_in in;
309 struct fuse_release_in in; 328 struct inode *inode;
310 struct work_struct work;
311 };
312 struct path path;
313 } release; 329 } release;
314 struct fuse_init_in init_in; 330 struct fuse_init_in init_in;
315 struct fuse_init_out init_out; 331 struct fuse_init_out init_out;
@@ -324,7 +340,6 @@ struct fuse_req {
324 struct fuse_req *next; 340 struct fuse_req *next;
325 } write; 341 } write;
326 struct fuse_notify_retrieve_in retrieve_in; 342 struct fuse_notify_retrieve_in retrieve_in;
327 struct fuse_lk_in lk_in;
328 } misc; 343 } misc;
329 344
330 /** page vector */ 345 /** page vector */
@@ -754,15 +769,6 @@ struct fuse_req *fuse_get_req_for_background(struct fuse_conn *fc,
754void __fuse_get_request(struct fuse_req *req); 769void __fuse_get_request(struct fuse_req *req);
755 770
756/** 771/**
757 * Get a request, may fail with -ENOMEM,
758 * useful for callers who doesn't use req->pages[]
759 */
760static inline struct fuse_req *fuse_get_req_nopages(struct fuse_conn *fc)
761{
762 return fuse_get_req(fc, 0);
763}
764
765/**
766 * Gets a requests for a file operation, always succeeds 772 * Gets a requests for a file operation, always succeeds
767 */ 773 */
768struct fuse_req *fuse_get_req_nofail_nopages(struct fuse_conn *fc, 774struct fuse_req *fuse_get_req_nofail_nopages(struct fuse_conn *fc,
@@ -780,6 +786,11 @@ void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req);
780void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req); 786void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req);
781 787
782/** 788/**
789 * Simple request sending that does request allocation and freeing
790 */
791ssize_t fuse_simple_request(struct fuse_conn *fc, struct fuse_args *args);
792
793/**
783 * Send a request in the background 794 * Send a request in the background
784 */ 795 */
785void fuse_request_send_background(struct fuse_conn *fc, struct fuse_req *req); 796void fuse_request_send_background(struct fuse_conn *fc, struct fuse_req *req);
@@ -804,8 +815,6 @@ void fuse_invalidate_atime(struct inode *inode);
804 */ 815 */
805struct fuse_conn *fuse_conn_get(struct fuse_conn *fc); 816struct fuse_conn *fuse_conn_get(struct fuse_conn *fc);
806 817
807void fuse_conn_kill(struct fuse_conn *fc);
808
809/** 818/**
810 * Initialize fuse_conn 819 * Initialize fuse_conn
811 */ 820 */
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 03246cd9d47a..6749109f255d 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -376,28 +376,13 @@ static void fuse_bdi_destroy(struct fuse_conn *fc)
376 bdi_destroy(&fc->bdi); 376 bdi_destroy(&fc->bdi);
377} 377}
378 378
379void fuse_conn_kill(struct fuse_conn *fc)
380{
381 spin_lock(&fc->lock);
382 fc->connected = 0;
383 fc->blocked = 0;
384 fc->initialized = 1;
385 spin_unlock(&fc->lock);
386 /* Flush all readers on this fs */
387 kill_fasync(&fc->fasync, SIGIO, POLL_IN);
388 wake_up_all(&fc->waitq);
389 wake_up_all(&fc->blocked_waitq);
390 wake_up_all(&fc->reserved_req_waitq);
391}
392EXPORT_SYMBOL_GPL(fuse_conn_kill);
393
394static void fuse_put_super(struct super_block *sb) 379static void fuse_put_super(struct super_block *sb)
395{ 380{
396 struct fuse_conn *fc = get_fuse_conn_super(sb); 381 struct fuse_conn *fc = get_fuse_conn_super(sb);
397 382
398 fuse_send_destroy(fc); 383 fuse_send_destroy(fc);
399 384
400 fuse_conn_kill(fc); 385 fuse_abort_conn(fc);
401 mutex_lock(&fuse_mutex); 386 mutex_lock(&fuse_mutex);
402 list_del(&fc->entry); 387 list_del(&fc->entry);
403 fuse_ctl_remove_conn(fc); 388 fuse_ctl_remove_conn(fc);
@@ -425,7 +410,7 @@ static int fuse_statfs(struct dentry *dentry, struct kstatfs *buf)
425{ 410{
426 struct super_block *sb = dentry->d_sb; 411 struct super_block *sb = dentry->d_sb;
427 struct fuse_conn *fc = get_fuse_conn_super(sb); 412 struct fuse_conn *fc = get_fuse_conn_super(sb);
428 struct fuse_req *req; 413 FUSE_ARGS(args);
429 struct fuse_statfs_out outarg; 414 struct fuse_statfs_out outarg;
430 int err; 415 int err;
431 416
@@ -434,23 +419,17 @@ static int fuse_statfs(struct dentry *dentry, struct kstatfs *buf)
434 return 0; 419 return 0;
435 } 420 }
436 421
437 req = fuse_get_req_nopages(fc);
438 if (IS_ERR(req))
439 return PTR_ERR(req);
440
441 memset(&outarg, 0, sizeof(outarg)); 422 memset(&outarg, 0, sizeof(outarg));
442 req->in.numargs = 0; 423 args.in.numargs = 0;
443 req->in.h.opcode = FUSE_STATFS; 424 args.in.h.opcode = FUSE_STATFS;
444 req->in.h.nodeid = get_node_id(dentry->d_inode); 425 args.in.h.nodeid = get_node_id(dentry->d_inode);
445 req->out.numargs = 1; 426 args.out.numargs = 1;
446 req->out.args[0].size = 427 args.out.args[0].size =
447 fc->minor < 4 ? FUSE_COMPAT_STATFS_SIZE : sizeof(outarg); 428 fc->minor < 4 ? FUSE_COMPAT_STATFS_SIZE : sizeof(outarg);
448 req->out.args[0].value = &outarg; 429 args.out.args[0].value = &outarg;
449 fuse_request_send(fc, req); 430 err = fuse_simple_request(fc, &args);
450 err = req->out.h.error;
451 if (!err) 431 if (!err)
452 convert_fuse_statfs(buf, &outarg.st); 432 convert_fuse_statfs(buf, &outarg.st);
453 fuse_put_request(fc, req);
454 return err; 433 return err;
455} 434}
456 435
diff --git a/fs/hfsplus/catalog.c b/fs/hfsplus/catalog.c
index 32602c667b4a..7892e6fddb66 100644
--- a/fs/hfsplus/catalog.c
+++ b/fs/hfsplus/catalog.c
@@ -38,21 +38,30 @@ int hfsplus_cat_bin_cmp_key(const hfsplus_btree_key *k1,
38 return hfsplus_strcmp(&k1->cat.name, &k2->cat.name); 38 return hfsplus_strcmp(&k1->cat.name, &k2->cat.name);
39} 39}
40 40
41void hfsplus_cat_build_key(struct super_block *sb, hfsplus_btree_key *key, 41/* Generates key for catalog file/folders record. */
42 u32 parent, struct qstr *str) 42int hfsplus_cat_build_key(struct super_block *sb,
43 hfsplus_btree_key *key, u32 parent, struct qstr *str)
43{ 44{
44 int len; 45 int len, err;
45 46
46 key->cat.parent = cpu_to_be32(parent); 47 key->cat.parent = cpu_to_be32(parent);
47 if (str) { 48 err = hfsplus_asc2uni(sb, &key->cat.name, HFSPLUS_MAX_STRLEN,
48 hfsplus_asc2uni(sb, &key->cat.name, HFSPLUS_MAX_STRLEN, 49 str->name, str->len);
49 str->name, str->len); 50 if (unlikely(err < 0))
50 len = be16_to_cpu(key->cat.name.length); 51 return err;
51 } else { 52
52 key->cat.name.length = 0; 53 len = be16_to_cpu(key->cat.name.length);
53 len = 0;
54 }
55 key->key_len = cpu_to_be16(6 + 2 * len); 54 key->key_len = cpu_to_be16(6 + 2 * len);
55 return 0;
56}
57
58/* Generates key for catalog thread record. */
59void hfsplus_cat_build_key_with_cnid(struct super_block *sb,
60 hfsplus_btree_key *key, u32 parent)
61{
62 key->cat.parent = cpu_to_be32(parent);
63 key->cat.name.length = 0;
64 key->key_len = cpu_to_be16(6);
56} 65}
57 66
58static void hfsplus_cat_build_key_uni(hfsplus_btree_key *key, u32 parent, 67static void hfsplus_cat_build_key_uni(hfsplus_btree_key *key, u32 parent,
@@ -167,11 +176,16 @@ static int hfsplus_fill_cat_thread(struct super_block *sb,
167 hfsplus_cat_entry *entry, int type, 176 hfsplus_cat_entry *entry, int type,
168 u32 parentid, struct qstr *str) 177 u32 parentid, struct qstr *str)
169{ 178{
179 int err;
180
170 entry->type = cpu_to_be16(type); 181 entry->type = cpu_to_be16(type);
171 entry->thread.reserved = 0; 182 entry->thread.reserved = 0;
172 entry->thread.parentID = cpu_to_be32(parentid); 183 entry->thread.parentID = cpu_to_be32(parentid);
173 hfsplus_asc2uni(sb, &entry->thread.nodeName, HFSPLUS_MAX_STRLEN, 184 err = hfsplus_asc2uni(sb, &entry->thread.nodeName, HFSPLUS_MAX_STRLEN,
174 str->name, str->len); 185 str->name, str->len);
186 if (unlikely(err < 0))
187 return err;
188
175 return 10 + be16_to_cpu(entry->thread.nodeName.length) * 2; 189 return 10 + be16_to_cpu(entry->thread.nodeName.length) * 2;
176} 190}
177 191
@@ -183,7 +197,7 @@ int hfsplus_find_cat(struct super_block *sb, u32 cnid,
183 int err; 197 int err;
184 u16 type; 198 u16 type;
185 199
186 hfsplus_cat_build_key(sb, fd->search_key, cnid, NULL); 200 hfsplus_cat_build_key_with_cnid(sb, fd->search_key, cnid);
187 err = hfs_brec_read(fd, &tmp, sizeof(hfsplus_cat_entry)); 201 err = hfs_brec_read(fd, &tmp, sizeof(hfsplus_cat_entry));
188 if (err) 202 if (err)
189 return err; 203 return err;
@@ -250,11 +264,16 @@ int hfsplus_create_cat(u32 cnid, struct inode *dir,
250 if (err) 264 if (err)
251 return err; 265 return err;
252 266
253 hfsplus_cat_build_key(sb, fd.search_key, cnid, NULL); 267 hfsplus_cat_build_key_with_cnid(sb, fd.search_key, cnid);
254 entry_size = hfsplus_fill_cat_thread(sb, &entry, 268 entry_size = hfsplus_fill_cat_thread(sb, &entry,
255 S_ISDIR(inode->i_mode) ? 269 S_ISDIR(inode->i_mode) ?
256 HFSPLUS_FOLDER_THREAD : HFSPLUS_FILE_THREAD, 270 HFSPLUS_FOLDER_THREAD : HFSPLUS_FILE_THREAD,
257 dir->i_ino, str); 271 dir->i_ino, str);
272 if (unlikely(entry_size < 0)) {
273 err = entry_size;
274 goto err2;
275 }
276
258 err = hfs_brec_find(&fd, hfs_find_rec_by_key); 277 err = hfs_brec_find(&fd, hfs_find_rec_by_key);
259 if (err != -ENOENT) { 278 if (err != -ENOENT) {
260 if (!err) 279 if (!err)
@@ -265,7 +284,10 @@ int hfsplus_create_cat(u32 cnid, struct inode *dir,
265 if (err) 284 if (err)
266 goto err2; 285 goto err2;
267 286
268 hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, str); 287 err = hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, str);
288 if (unlikely(err))
289 goto err1;
290
269 entry_size = hfsplus_cat_build_record(&entry, cnid, inode); 291 entry_size = hfsplus_cat_build_record(&entry, cnid, inode);
270 err = hfs_brec_find(&fd, hfs_find_rec_by_key); 292 err = hfs_brec_find(&fd, hfs_find_rec_by_key);
271 if (err != -ENOENT) { 293 if (err != -ENOENT) {
@@ -288,7 +310,7 @@ int hfsplus_create_cat(u32 cnid, struct inode *dir,
288 return 0; 310 return 0;
289 311
290err1: 312err1:
291 hfsplus_cat_build_key(sb, fd.search_key, cnid, NULL); 313 hfsplus_cat_build_key_with_cnid(sb, fd.search_key, cnid);
292 if (!hfs_brec_find(&fd, hfs_find_rec_by_key)) 314 if (!hfs_brec_find(&fd, hfs_find_rec_by_key))
293 hfs_brec_remove(&fd); 315 hfs_brec_remove(&fd);
294err2: 316err2:
@@ -313,7 +335,7 @@ int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str)
313 if (!str) { 335 if (!str) {
314 int len; 336 int len;
315 337
316 hfsplus_cat_build_key(sb, fd.search_key, cnid, NULL); 338 hfsplus_cat_build_key_with_cnid(sb, fd.search_key, cnid);
317 err = hfs_brec_find(&fd, hfs_find_rec_by_key); 339 err = hfs_brec_find(&fd, hfs_find_rec_by_key);
318 if (err) 340 if (err)
319 goto out; 341 goto out;
@@ -329,7 +351,9 @@ int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str)
329 off + 2, len); 351 off + 2, len);
330 fd.search_key->key_len = cpu_to_be16(6 + len); 352 fd.search_key->key_len = cpu_to_be16(6 + len);
331 } else 353 } else
332 hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, str); 354 err = hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, str);
355 if (unlikely(err))
356 goto out;
333 357
334 err = hfs_brec_find(&fd, hfs_find_rec_by_key); 358 err = hfs_brec_find(&fd, hfs_find_rec_by_key);
335 if (err) 359 if (err)
@@ -360,7 +384,7 @@ int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str)
360 if (err) 384 if (err)
361 goto out; 385 goto out;
362 386
363 hfsplus_cat_build_key(sb, fd.search_key, cnid, NULL); 387 hfsplus_cat_build_key_with_cnid(sb, fd.search_key, cnid);
364 err = hfs_brec_find(&fd, hfs_find_rec_by_key); 388 err = hfs_brec_find(&fd, hfs_find_rec_by_key);
365 if (err) 389 if (err)
366 goto out; 390 goto out;
@@ -405,7 +429,11 @@ int hfsplus_rename_cat(u32 cnid,
405 dst_fd = src_fd; 429 dst_fd = src_fd;
406 430
407 /* find the old dir entry and read the data */ 431 /* find the old dir entry and read the data */
408 hfsplus_cat_build_key(sb, src_fd.search_key, src_dir->i_ino, src_name); 432 err = hfsplus_cat_build_key(sb, src_fd.search_key,
433 src_dir->i_ino, src_name);
434 if (unlikely(err))
435 goto out;
436
409 err = hfs_brec_find(&src_fd, hfs_find_rec_by_key); 437 err = hfs_brec_find(&src_fd, hfs_find_rec_by_key);
410 if (err) 438 if (err)
411 goto out; 439 goto out;
@@ -419,7 +447,11 @@ int hfsplus_rename_cat(u32 cnid,
419 type = be16_to_cpu(entry.type); 447 type = be16_to_cpu(entry.type);
420 448
421 /* create new dir entry with the data from the old entry */ 449 /* create new dir entry with the data from the old entry */
422 hfsplus_cat_build_key(sb, dst_fd.search_key, dst_dir->i_ino, dst_name); 450 err = hfsplus_cat_build_key(sb, dst_fd.search_key,
451 dst_dir->i_ino, dst_name);
452 if (unlikely(err))
453 goto out;
454
423 err = hfs_brec_find(&dst_fd, hfs_find_rec_by_key); 455 err = hfs_brec_find(&dst_fd, hfs_find_rec_by_key);
424 if (err != -ENOENT) { 456 if (err != -ENOENT) {
425 if (!err) 457 if (!err)
@@ -436,7 +468,11 @@ int hfsplus_rename_cat(u32 cnid,
436 dst_dir->i_mtime = dst_dir->i_ctime = CURRENT_TIME_SEC; 468 dst_dir->i_mtime = dst_dir->i_ctime = CURRENT_TIME_SEC;
437 469
438 /* finally remove the old entry */ 470 /* finally remove the old entry */
439 hfsplus_cat_build_key(sb, src_fd.search_key, src_dir->i_ino, src_name); 471 err = hfsplus_cat_build_key(sb, src_fd.search_key,
472 src_dir->i_ino, src_name);
473 if (unlikely(err))
474 goto out;
475
440 err = hfs_brec_find(&src_fd, hfs_find_rec_by_key); 476 err = hfs_brec_find(&src_fd, hfs_find_rec_by_key);
441 if (err) 477 if (err)
442 goto out; 478 goto out;
@@ -449,7 +485,7 @@ int hfsplus_rename_cat(u32 cnid,
449 src_dir->i_mtime = src_dir->i_ctime = CURRENT_TIME_SEC; 485 src_dir->i_mtime = src_dir->i_ctime = CURRENT_TIME_SEC;
450 486
451 /* remove old thread entry */ 487 /* remove old thread entry */
452 hfsplus_cat_build_key(sb, src_fd.search_key, cnid, NULL); 488 hfsplus_cat_build_key_with_cnid(sb, src_fd.search_key, cnid);
453 err = hfs_brec_find(&src_fd, hfs_find_rec_by_key); 489 err = hfs_brec_find(&src_fd, hfs_find_rec_by_key);
454 if (err) 490 if (err)
455 goto out; 491 goto out;
@@ -459,9 +495,14 @@ int hfsplus_rename_cat(u32 cnid,
459 goto out; 495 goto out;
460 496
461 /* create new thread entry */ 497 /* create new thread entry */
462 hfsplus_cat_build_key(sb, dst_fd.search_key, cnid, NULL); 498 hfsplus_cat_build_key_with_cnid(sb, dst_fd.search_key, cnid);
463 entry_size = hfsplus_fill_cat_thread(sb, &entry, type, 499 entry_size = hfsplus_fill_cat_thread(sb, &entry, type,
464 dst_dir->i_ino, dst_name); 500 dst_dir->i_ino, dst_name);
501 if (unlikely(entry_size < 0)) {
502 err = entry_size;
503 goto out;
504 }
505
465 err = hfs_brec_find(&dst_fd, hfs_find_rec_by_key); 506 err = hfs_brec_find(&dst_fd, hfs_find_rec_by_key);
466 if (err != -ENOENT) { 507 if (err != -ENOENT) {
467 if (!err) 508 if (!err)
diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c
index 610a3260bef1..435bea231cc6 100644
--- a/fs/hfsplus/dir.c
+++ b/fs/hfsplus/dir.c
@@ -44,7 +44,10 @@ static struct dentry *hfsplus_lookup(struct inode *dir, struct dentry *dentry,
44 err = hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd); 44 err = hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd);
45 if (err) 45 if (err)
46 return ERR_PTR(err); 46 return ERR_PTR(err);
47 hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, &dentry->d_name); 47 err = hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino,
48 &dentry->d_name);
49 if (unlikely(err < 0))
50 goto fail;
48again: 51again:
49 err = hfs_brec_read(&fd, &entry, sizeof(entry)); 52 err = hfs_brec_read(&fd, &entry, sizeof(entry));
50 if (err) { 53 if (err) {
@@ -97,9 +100,11 @@ again:
97 be32_to_cpu(entry.file.permissions.dev); 100 be32_to_cpu(entry.file.permissions.dev);
98 str.len = sprintf(name, "iNode%d", linkid); 101 str.len = sprintf(name, "iNode%d", linkid);
99 str.name = name; 102 str.name = name;
100 hfsplus_cat_build_key(sb, fd.search_key, 103 err = hfsplus_cat_build_key(sb, fd.search_key,
101 HFSPLUS_SB(sb)->hidden_dir->i_ino, 104 HFSPLUS_SB(sb)->hidden_dir->i_ino,
102 &str); 105 &str);
106 if (unlikely(err < 0))
107 goto fail;
103 goto again; 108 goto again;
104 } 109 }
105 } else if (!dentry->d_fsdata) 110 } else if (!dentry->d_fsdata)
@@ -145,7 +150,7 @@ static int hfsplus_readdir(struct file *file, struct dir_context *ctx)
145 err = -ENOMEM; 150 err = -ENOMEM;
146 goto out; 151 goto out;
147 } 152 }
148 hfsplus_cat_build_key(sb, fd.search_key, inode->i_ino, NULL); 153 hfsplus_cat_build_key_with_cnid(sb, fd.search_key, inode->i_ino);
149 err = hfs_brec_find(&fd, hfs_find_rec_by_key); 154 err = hfs_brec_find(&fd, hfs_find_rec_by_key);
150 if (err) 155 if (err)
151 goto out; 156 goto out;
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h
index eb5e059f481a..b0441d65fa54 100644
--- a/fs/hfsplus/hfsplus_fs.h
+++ b/fs/hfsplus/hfsplus_fs.h
@@ -443,8 +443,10 @@ int hfsplus_cat_case_cmp_key(const hfsplus_btree_key *k1,
443 const hfsplus_btree_key *k2); 443 const hfsplus_btree_key *k2);
444int hfsplus_cat_bin_cmp_key(const hfsplus_btree_key *k1, 444int hfsplus_cat_bin_cmp_key(const hfsplus_btree_key *k1,
445 const hfsplus_btree_key *k2); 445 const hfsplus_btree_key *k2);
446void hfsplus_cat_build_key(struct super_block *sb, hfsplus_btree_key *key, 446int hfsplus_cat_build_key(struct super_block *sb, hfsplus_btree_key *key,
447 u32 parent, struct qstr *str); 447 u32 parent, struct qstr *str);
448void hfsplus_cat_build_key_with_cnid(struct super_block *sb,
449 hfsplus_btree_key *key, u32 parent);
448void hfsplus_cat_set_perms(struct inode *inode, struct hfsplus_perm *perms); 450void hfsplus_cat_set_perms(struct inode *inode, struct hfsplus_perm *perms);
449int hfsplus_find_cat(struct super_block *sb, u32 cnid, 451int hfsplus_find_cat(struct super_block *sb, u32 cnid,
450 struct hfs_find_data *fd); 452 struct hfs_find_data *fd);
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 4cf2024b87da..593af2fdcc2d 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -515,7 +515,9 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
515 err = hfs_find_init(sbi->cat_tree, &fd); 515 err = hfs_find_init(sbi->cat_tree, &fd);
516 if (err) 516 if (err)
517 goto out_put_root; 517 goto out_put_root;
518 hfsplus_cat_build_key(sb, fd.search_key, HFSPLUS_ROOT_CNID, &str); 518 err = hfsplus_cat_build_key(sb, fd.search_key, HFSPLUS_ROOT_CNID, &str);
519 if (unlikely(err < 0))
520 goto out_put_root;
519 if (!hfs_brec_read(&fd, &entry, sizeof(entry))) { 521 if (!hfs_brec_read(&fd, &entry, sizeof(entry))) {
520 hfs_find_exit(&fd); 522 hfs_find_exit(&fd);
521 if (entry.type != cpu_to_be16(HFSPLUS_FOLDER)) 523 if (entry.type != cpu_to_be16(HFSPLUS_FOLDER))
diff --git a/fs/jffs2/readinode.c b/fs/jffs2/readinode.c
index 386303dca382..dddbde4f56f4 100644
--- a/fs/jffs2/readinode.c
+++ b/fs/jffs2/readinode.c
@@ -224,7 +224,7 @@ static int jffs2_add_tn_to_tree(struct jffs2_sb_info *c,
224 224
225 dbg_readinode("insert fragment %#04x-%#04x, ver %u at %08x\n", tn->fn->ofs, fn_end, tn->version, ref_offset(tn->fn->raw)); 225 dbg_readinode("insert fragment %#04x-%#04x, ver %u at %08x\n", tn->fn->ofs, fn_end, tn->version, ref_offset(tn->fn->raw));
226 226
227 /* If a node has zero dsize, we only have to keep if it if it might be the 227 /* If a node has zero dsize, we only have to keep it if it might be the
228 node with highest version -- i.e. the one which will end up as f->metadata. 228 node with highest version -- i.e. the one which will end up as f->metadata.
229 Note that such nodes won't be REF_UNCHECKED since there are no data to 229 Note that such nodes won't be REF_UNCHECKED since there are no data to
230 check anyway. */ 230 check anyway. */
diff --git a/fs/jffs2/summary.c b/fs/jffs2/summary.c
index c522d098bb4f..bc5385471a6e 100644
--- a/fs/jffs2/summary.c
+++ b/fs/jffs2/summary.c
@@ -844,6 +844,7 @@ static int jffs2_sum_write_data(struct jffs2_sb_info *c, struct jffs2_eraseblock
844/* Write out summary information - called from jffs2_do_reserve_space */ 844/* Write out summary information - called from jffs2_do_reserve_space */
845 845
846int jffs2_sum_write_sumnode(struct jffs2_sb_info *c) 846int jffs2_sum_write_sumnode(struct jffs2_sb_info *c)
847 __must_hold(&c->erase_completion_block)
847{ 848{
848 int datasize, infosize, padsize; 849 int datasize, infosize, padsize;
849 struct jffs2_eraseblock *jeb; 850 struct jffs2_eraseblock *jeb;
diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
index 697390ea47b8..ddc9f9612f16 100644
--- a/fs/kernfs/file.c
+++ b/fs/kernfs/file.c
@@ -448,27 +448,6 @@ static struct mempolicy *kernfs_vma_get_policy(struct vm_area_struct *vma,
448 return pol; 448 return pol;
449} 449}
450 450
451static int kernfs_vma_migrate(struct vm_area_struct *vma,
452 const nodemask_t *from, const nodemask_t *to,
453 unsigned long flags)
454{
455 struct file *file = vma->vm_file;
456 struct kernfs_open_file *of = kernfs_of(file);
457 int ret;
458
459 if (!of->vm_ops)
460 return 0;
461
462 if (!kernfs_get_active(of->kn))
463 return 0;
464
465 ret = 0;
466 if (of->vm_ops->migrate)
467 ret = of->vm_ops->migrate(vma, from, to, flags);
468
469 kernfs_put_active(of->kn);
470 return ret;
471}
472#endif 451#endif
473 452
474static const struct vm_operations_struct kernfs_vm_ops = { 453static const struct vm_operations_struct kernfs_vm_ops = {
@@ -479,7 +458,6 @@ static const struct vm_operations_struct kernfs_vm_ops = {
479#ifdef CONFIG_NUMA 458#ifdef CONFIG_NUMA
480 .set_policy = kernfs_vma_set_policy, 459 .set_policy = kernfs_vma_set_policy,
481 .get_policy = kernfs_vma_get_policy, 460 .get_policy = kernfs_vma_get_policy,
482 .migrate = kernfs_vma_migrate,
483#endif 461#endif
484}; 462};
485 463
diff --git a/fs/namespace.c b/fs/namespace.c
index 30df6e7dd807..cd1e9681a0cf 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -963,7 +963,8 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
963 } 963 }
964 964
965 /* Don't allow unprivileged users to reveal what is under a mount */ 965 /* Don't allow unprivileged users to reveal what is under a mount */
966 if ((flag & CL_UNPRIVILEGED) && list_empty(&old->mnt_expire)) 966 if ((flag & CL_UNPRIVILEGED) &&
967 (!(flag & CL_EXPIRE) || list_empty(&old->mnt_expire)))
967 mnt->mnt.mnt_flags |= MNT_LOCKED; 968 mnt->mnt.mnt_flags |= MNT_LOCKED;
968 969
969 atomic_inc(&sb->s_active); 970 atomic_inc(&sb->s_active);
@@ -1369,6 +1370,8 @@ void umount_tree(struct mount *mnt, int how)
1369 } 1370 }
1370 if (last) { 1371 if (last) {
1371 last->mnt_hash.next = unmounted.first; 1372 last->mnt_hash.next = unmounted.first;
1373 if (unmounted.first)
1374 unmounted.first->pprev = &last->mnt_hash.next;
1372 unmounted.first = tmp_list.first; 1375 unmounted.first = tmp_list.first;
1373 unmounted.first->pprev = &unmounted.first; 1376 unmounted.first->pprev = &unmounted.first;
1374 } 1377 }
@@ -1544,6 +1547,9 @@ SYSCALL_DEFINE2(umount, char __user *, name, int, flags)
1544 goto dput_and_out; 1547 goto dput_and_out;
1545 if (mnt->mnt.mnt_flags & MNT_LOCKED) 1548 if (mnt->mnt.mnt_flags & MNT_LOCKED)
1546 goto dput_and_out; 1549 goto dput_and_out;
1550 retval = -EPERM;
1551 if (flags & MNT_FORCE && !capable(CAP_SYS_ADMIN))
1552 goto dput_and_out;
1547 1553
1548 retval = do_umount(mnt, flags); 1554 retval = do_umount(mnt, flags);
1549dput_and_out: 1555dput_and_out:
@@ -1606,7 +1612,6 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
1606 if (IS_ERR(q)) 1612 if (IS_ERR(q))
1607 return q; 1613 return q;
1608 1614
1609 q->mnt.mnt_flags &= ~MNT_LOCKED;
1610 q->mnt_mountpoint = mnt->mnt_mountpoint; 1615 q->mnt_mountpoint = mnt->mnt_mountpoint;
1611 1616
1612 p = mnt; 1617 p = mnt;
@@ -2097,7 +2102,13 @@ static int do_remount(struct path *path, int flags, int mnt_flags,
2097 } 2102 }
2098 if ((mnt->mnt.mnt_flags & MNT_LOCK_NODEV) && 2103 if ((mnt->mnt.mnt_flags & MNT_LOCK_NODEV) &&
2099 !(mnt_flags & MNT_NODEV)) { 2104 !(mnt_flags & MNT_NODEV)) {
2100 return -EPERM; 2105 /* Was the nodev implicitly added in mount? */
2106 if ((mnt->mnt_ns->user_ns != &init_user_ns) &&
2107 !(sb->s_type->fs_flags & FS_USERNS_DEV_MOUNT)) {
2108 mnt_flags |= MNT_NODEV;
2109 } else {
2110 return -EPERM;
2111 }
2101 } 2112 }
2102 if ((mnt->mnt.mnt_flags & MNT_LOCK_NOSUID) && 2113 if ((mnt->mnt.mnt_flags & MNT_LOCK_NOSUID) &&
2103 !(mnt_flags & MNT_NOSUID)) { 2114 !(mnt_flags & MNT_NOSUID)) {
@@ -2958,6 +2969,8 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
2958 /* mount new_root on / */ 2969 /* mount new_root on / */
2959 attach_mnt(new_mnt, real_mount(root_parent.mnt), root_mp); 2970 attach_mnt(new_mnt, real_mount(root_parent.mnt), root_mp);
2960 touch_mnt_namespace(current->nsproxy->mnt_ns); 2971 touch_mnt_namespace(current->nsproxy->mnt_ns);
2972 /* A moved mount should not expire automatically */
2973 list_del_init(&new_mnt->mnt_expire);
2961 unlock_mount_hash(); 2974 unlock_mount_hash();
2962 chroot_fs_refs(&root, &new); 2975 chroot_fs_refs(&root, &new);
2963 put_mountpoint(root_mp); 2976 put_mountpoint(root_mp);
@@ -3002,6 +3015,7 @@ static void __init init_mount_tree(void)
3002 3015
3003 root.mnt = mnt; 3016 root.mnt = mnt;
3004 root.dentry = mnt->mnt_root; 3017 root.dentry = mnt->mnt_root;
3018 mnt->mnt_flags |= MNT_LOCKED;
3005 3019
3006 set_fs_pwd(current->fs, &root); 3020 set_fs_pwd(current->fs, &root);
3007 set_fs_root(current->fs, &root); 3021 set_fs_root(current->fs, &root);
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index a93bf9892256..fcae9ef1a328 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -5662,7 +5662,7 @@ int ocfs2_remove_btree_range(struct inode *inode,
5662 struct ocfs2_extent_tree *et, 5662 struct ocfs2_extent_tree *et,
5663 u32 cpos, u32 phys_cpos, u32 len, int flags, 5663 u32 cpos, u32 phys_cpos, u32 len, int flags,
5664 struct ocfs2_cached_dealloc_ctxt *dealloc, 5664 struct ocfs2_cached_dealloc_ctxt *dealloc,
5665 u64 refcount_loc) 5665 u64 refcount_loc, bool refcount_tree_locked)
5666{ 5666{
5667 int ret, credits = 0, extra_blocks = 0; 5667 int ret, credits = 0, extra_blocks = 0;
5668 u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos); 5668 u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
@@ -5676,11 +5676,13 @@ int ocfs2_remove_btree_range(struct inode *inode,
5676 BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & 5676 BUG_ON(!(OCFS2_I(inode)->ip_dyn_features &
5677 OCFS2_HAS_REFCOUNT_FL)); 5677 OCFS2_HAS_REFCOUNT_FL));
5678 5678
5679 ret = ocfs2_lock_refcount_tree(osb, refcount_loc, 1, 5679 if (!refcount_tree_locked) {
5680 &ref_tree, NULL); 5680 ret = ocfs2_lock_refcount_tree(osb, refcount_loc, 1,
5681 if (ret) { 5681 &ref_tree, NULL);
5682 mlog_errno(ret); 5682 if (ret) {
5683 goto bail; 5683 mlog_errno(ret);
5684 goto bail;
5685 }
5684 } 5686 }
5685 5687
5686 ret = ocfs2_prepare_refcount_change_for_del(inode, 5688 ret = ocfs2_prepare_refcount_change_for_del(inode,
@@ -7021,6 +7023,7 @@ int ocfs2_commit_truncate(struct ocfs2_super *osb,
7021 u64 refcount_loc = le64_to_cpu(di->i_refcount_loc); 7023 u64 refcount_loc = le64_to_cpu(di->i_refcount_loc);
7022 struct ocfs2_extent_tree et; 7024 struct ocfs2_extent_tree et;
7023 struct ocfs2_cached_dealloc_ctxt dealloc; 7025 struct ocfs2_cached_dealloc_ctxt dealloc;
7026 struct ocfs2_refcount_tree *ref_tree = NULL;
7024 7027
7025 ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), di_bh); 7028 ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), di_bh);
7026 ocfs2_init_dealloc_ctxt(&dealloc); 7029 ocfs2_init_dealloc_ctxt(&dealloc);
@@ -7130,9 +7133,18 @@ start:
7130 7133
7131 phys_cpos = ocfs2_blocks_to_clusters(inode->i_sb, blkno); 7134 phys_cpos = ocfs2_blocks_to_clusters(inode->i_sb, blkno);
7132 7135
7136 if ((flags & OCFS2_EXT_REFCOUNTED) && trunc_len && !ref_tree) {
7137 status = ocfs2_lock_refcount_tree(osb, refcount_loc, 1,
7138 &ref_tree, NULL);
7139 if (status) {
7140 mlog_errno(status);
7141 goto bail;
7142 }
7143 }
7144
7133 status = ocfs2_remove_btree_range(inode, &et, trunc_cpos, 7145 status = ocfs2_remove_btree_range(inode, &et, trunc_cpos,
7134 phys_cpos, trunc_len, flags, &dealloc, 7146 phys_cpos, trunc_len, flags, &dealloc,
7135 refcount_loc); 7147 refcount_loc, true);
7136 if (status < 0) { 7148 if (status < 0) {
7137 mlog_errno(status); 7149 mlog_errno(status);
7138 goto bail; 7150 goto bail;
@@ -7147,6 +7159,8 @@ start:
7147 goto start; 7159 goto start;
7148 7160
7149bail: 7161bail:
7162 if (ref_tree)
7163 ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
7150 7164
7151 ocfs2_schedule_truncate_log_flush(osb, 1); 7165 ocfs2_schedule_truncate_log_flush(osb, 1);
7152 7166
diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h
index ca381c584127..fb09b97db162 100644
--- a/fs/ocfs2/alloc.h
+++ b/fs/ocfs2/alloc.h
@@ -142,7 +142,7 @@ int ocfs2_remove_btree_range(struct inode *inode,
142 struct ocfs2_extent_tree *et, 142 struct ocfs2_extent_tree *et,
143 u32 cpos, u32 phys_cpos, u32 len, int flags, 143 u32 cpos, u32 phys_cpos, u32 len, int flags,
144 struct ocfs2_cached_dealloc_ctxt *dealloc, 144 struct ocfs2_cached_dealloc_ctxt *dealloc,
145 u64 refcount_loc); 145 u64 refcount_loc, bool refcount_tree_locked);
146 146
147int ocfs2_num_free_extents(struct ocfs2_super *osb, 147int ocfs2_num_free_extents(struct ocfs2_super *osb,
148 struct ocfs2_extent_tree *et); 148 struct ocfs2_extent_tree *et);
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index d9f222987f24..46d93e941f3d 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -894,7 +894,7 @@ void ocfs2_unlock_and_free_pages(struct page **pages, int num_pages)
894 } 894 }
895} 895}
896 896
897static void ocfs2_free_write_ctxt(struct ocfs2_write_ctxt *wc) 897static void ocfs2_unlock_pages(struct ocfs2_write_ctxt *wc)
898{ 898{
899 int i; 899 int i;
900 900
@@ -915,7 +915,11 @@ static void ocfs2_free_write_ctxt(struct ocfs2_write_ctxt *wc)
915 page_cache_release(wc->w_target_page); 915 page_cache_release(wc->w_target_page);
916 } 916 }
917 ocfs2_unlock_and_free_pages(wc->w_pages, wc->w_num_pages); 917 ocfs2_unlock_and_free_pages(wc->w_pages, wc->w_num_pages);
918}
918 919
920static void ocfs2_free_write_ctxt(struct ocfs2_write_ctxt *wc)
921{
922 ocfs2_unlock_pages(wc);
919 brelse(wc->w_di_bh); 923 brelse(wc->w_di_bh);
920 kfree(wc); 924 kfree(wc);
921} 925}
@@ -2042,11 +2046,19 @@ out_write_size:
2042 ocfs2_update_inode_fsync_trans(handle, inode, 1); 2046 ocfs2_update_inode_fsync_trans(handle, inode, 1);
2043 ocfs2_journal_dirty(handle, wc->w_di_bh); 2047 ocfs2_journal_dirty(handle, wc->w_di_bh);
2044 2048
2049 /* unlock pages before dealloc since it needs acquiring j_trans_barrier
2050 * lock, or it will cause a deadlock since journal commit threads holds
2051 * this lock and will ask for the page lock when flushing the data.
2052 * put it here to preserve the unlock order.
2053 */
2054 ocfs2_unlock_pages(wc);
2055
2045 ocfs2_commit_trans(osb, handle); 2056 ocfs2_commit_trans(osb, handle);
2046 2057
2047 ocfs2_run_deallocs(osb, &wc->w_dealloc); 2058 ocfs2_run_deallocs(osb, &wc->w_dealloc);
2048 2059
2049 ocfs2_free_write_ctxt(wc); 2060 brelse(wc->w_di_bh);
2061 kfree(wc);
2050 2062
2051 return copied; 2063 return copied;
2052} 2064}
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index 79d56dc981bc..319e786175af 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -4479,7 +4479,7 @@ int ocfs2_dx_dir_truncate(struct inode *dir, struct buffer_head *di_bh)
4479 p_cpos = ocfs2_blocks_to_clusters(dir->i_sb, blkno); 4479 p_cpos = ocfs2_blocks_to_clusters(dir->i_sb, blkno);
4480 4480
4481 ret = ocfs2_remove_btree_range(dir, &et, cpos, p_cpos, clen, 0, 4481 ret = ocfs2_remove_btree_range(dir, &et, cpos, p_cpos, clen, 0,
4482 &dealloc, 0); 4482 &dealloc, 0, false);
4483 if (ret) { 4483 if (ret) {
4484 mlog_errno(ret); 4484 mlog_errno(ret);
4485 goto out; 4485 goto out;
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 3689b3592042..a6944b25fd5b 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -695,14 +695,6 @@ void __dlm_lockres_grab_inflight_worker(struct dlm_ctxt *dlm,
695 res->inflight_assert_workers); 695 res->inflight_assert_workers);
696} 696}
697 697
698static void dlm_lockres_grab_inflight_worker(struct dlm_ctxt *dlm,
699 struct dlm_lock_resource *res)
700{
701 spin_lock(&res->spinlock);
702 __dlm_lockres_grab_inflight_worker(dlm, res);
703 spin_unlock(&res->spinlock);
704}
705
706static void __dlm_lockres_drop_inflight_worker(struct dlm_ctxt *dlm, 698static void __dlm_lockres_drop_inflight_worker(struct dlm_ctxt *dlm,
707 struct dlm_lock_resource *res) 699 struct dlm_lock_resource *res)
708{ 700{
@@ -1646,6 +1638,7 @@ send_response:
1646 } 1638 }
1647 mlog(0, "%u is the owner of %.*s, cleaning everyone else\n", 1639 mlog(0, "%u is the owner of %.*s, cleaning everyone else\n",
1648 dlm->node_num, res->lockname.len, res->lockname.name); 1640 dlm->node_num, res->lockname.len, res->lockname.name);
1641 spin_lock(&res->spinlock);
1649 ret = dlm_dispatch_assert_master(dlm, res, 0, request->node_idx, 1642 ret = dlm_dispatch_assert_master(dlm, res, 0, request->node_idx,
1650 DLM_ASSERT_MASTER_MLE_CLEANUP); 1643 DLM_ASSERT_MASTER_MLE_CLEANUP);
1651 if (ret < 0) { 1644 if (ret < 0) {
@@ -1653,7 +1646,8 @@ send_response:
1653 response = DLM_MASTER_RESP_ERROR; 1646 response = DLM_MASTER_RESP_ERROR;
1654 dlm_lockres_put(res); 1647 dlm_lockres_put(res);
1655 } else 1648 } else
1656 dlm_lockres_grab_inflight_worker(dlm, res); 1649 __dlm_lockres_grab_inflight_worker(dlm, res);
1650 spin_unlock(&res->spinlock);
1657 } else { 1651 } else {
1658 if (res) 1652 if (res)
1659 dlm_lockres_put(res); 1653 dlm_lockres_put(res);
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 69fb9f75b082..3950693dd0f6 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1803,7 +1803,7 @@ static int ocfs2_remove_inode_range(struct inode *inode,
1803 1803
1804 ret = ocfs2_remove_btree_range(inode, &et, trunc_cpos, 1804 ret = ocfs2_remove_btree_range(inode, &et, trunc_cpos,
1805 phys_cpos, trunc_len, flags, 1805 phys_cpos, trunc_len, flags,
1806 &dealloc, refcount_loc); 1806 &dealloc, refcount_loc, false);
1807 if (ret < 0) { 1807 if (ret < 0) {
1808 mlog_errno(ret); 1808 mlog_errno(ret);
1809 goto out; 1809 goto out;
diff --git a/fs/pnode.c b/fs/pnode.c
index aae331a5d03b..260ac8f898a4 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -242,6 +242,7 @@ static int propagate_one(struct mount *m)
242 child = copy_tree(last_source, last_source->mnt.mnt_root, type); 242 child = copy_tree(last_source, last_source->mnt.mnt_root, type);
243 if (IS_ERR(child)) 243 if (IS_ERR(child))
244 return PTR_ERR(child); 244 return PTR_ERR(child);
245 child->mnt.mnt_flags &= ~MNT_LOCKED;
245 mnt_set_mountpoint(m, mp, child); 246 mnt_set_mountpoint(m, mp, child);
246 last_dest = m; 247 last_dest = m;
247 last_source = child; 248 last_source = child;
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 590aeda5af12..3f3d7aeb0712 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2464,6 +2464,57 @@ static const struct file_operations proc_projid_map_operations = {
2464 .llseek = seq_lseek, 2464 .llseek = seq_lseek,
2465 .release = proc_id_map_release, 2465 .release = proc_id_map_release,
2466}; 2466};
2467
2468static int proc_setgroups_open(struct inode *inode, struct file *file)
2469{
2470 struct user_namespace *ns = NULL;
2471 struct task_struct *task;
2472 int ret;
2473
2474 ret = -ESRCH;
2475 task = get_proc_task(inode);
2476 if (task) {
2477 rcu_read_lock();
2478 ns = get_user_ns(task_cred_xxx(task, user_ns));
2479 rcu_read_unlock();
2480 put_task_struct(task);
2481 }
2482 if (!ns)
2483 goto err;
2484
2485 if (file->f_mode & FMODE_WRITE) {
2486 ret = -EACCES;
2487 if (!ns_capable(ns, CAP_SYS_ADMIN))
2488 goto err_put_ns;
2489 }
2490
2491 ret = single_open(file, &proc_setgroups_show, ns);
2492 if (ret)
2493 goto err_put_ns;
2494
2495 return 0;
2496err_put_ns:
2497 put_user_ns(ns);
2498err:
2499 return ret;
2500}
2501
2502static int proc_setgroups_release(struct inode *inode, struct file *file)
2503{
2504 struct seq_file *seq = file->private_data;
2505 struct user_namespace *ns = seq->private;
2506 int ret = single_release(inode, file);
2507 put_user_ns(ns);
2508 return ret;
2509}
2510
2511static const struct file_operations proc_setgroups_operations = {
2512 .open = proc_setgroups_open,
2513 .write = proc_setgroups_write,
2514 .read = seq_read,
2515 .llseek = seq_lseek,
2516 .release = proc_setgroups_release,
2517};
2467#endif /* CONFIG_USER_NS */ 2518#endif /* CONFIG_USER_NS */
2468 2519
2469static int proc_pid_personality(struct seq_file *m, struct pid_namespace *ns, 2520static int proc_pid_personality(struct seq_file *m, struct pid_namespace *ns,
@@ -2572,6 +2623,7 @@ static const struct pid_entry tgid_base_stuff[] = {
2572 REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations), 2623 REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations),
2573 REG("gid_map", S_IRUGO|S_IWUSR, proc_gid_map_operations), 2624 REG("gid_map", S_IRUGO|S_IWUSR, proc_gid_map_operations),
2574 REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations), 2625 REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations),
2626 REG("setgroups", S_IRUGO|S_IWUSR, proc_setgroups_operations),
2575#endif 2627#endif
2576#ifdef CONFIG_CHECKPOINT_RESTORE 2628#ifdef CONFIG_CHECKPOINT_RESTORE
2577 REG("timers", S_IRUGO, proc_timers_operations), 2629 REG("timers", S_IRUGO, proc_timers_operations),
@@ -2916,6 +2968,7 @@ static const struct pid_entry tid_base_stuff[] = {
2916 REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations), 2968 REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations),
2917 REG("gid_map", S_IRUGO|S_IWUSR, proc_gid_map_operations), 2969 REG("gid_map", S_IRUGO|S_IWUSR, proc_gid_map_operations),
2918 REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations), 2970 REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations),
2971 REG("setgroups", S_IRUGO|S_IWUSR, proc_setgroups_operations),
2919#endif 2972#endif
2920}; 2973};
2921 2974
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index aa1eee06420f..d3ebf2e61853 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -12,6 +12,9 @@
12#include <linux/vmstat.h> 12#include <linux/vmstat.h>
13#include <linux/atomic.h> 13#include <linux/atomic.h>
14#include <linux/vmalloc.h> 14#include <linux/vmalloc.h>
15#ifdef CONFIG_CMA
16#include <linux/cma.h>
17#endif
15#include <asm/page.h> 18#include <asm/page.h>
16#include <asm/pgtable.h> 19#include <asm/pgtable.h>
17#include "internal.h" 20#include "internal.h"
@@ -138,6 +141,10 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
138#ifdef CONFIG_TRANSPARENT_HUGEPAGE 141#ifdef CONFIG_TRANSPARENT_HUGEPAGE
139 "AnonHugePages: %8lu kB\n" 142 "AnonHugePages: %8lu kB\n"
140#endif 143#endif
144#ifdef CONFIG_CMA
145 "CmaTotal: %8lu kB\n"
146 "CmaFree: %8lu kB\n"
147#endif
141 , 148 ,
142 K(i.totalram), 149 K(i.totalram),
143 K(i.freeram), 150 K(i.freeram),
@@ -187,12 +194,16 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
187 vmi.used >> 10, 194 vmi.used >> 10,
188 vmi.largest_chunk >> 10 195 vmi.largest_chunk >> 10
189#ifdef CONFIG_MEMORY_FAILURE 196#ifdef CONFIG_MEMORY_FAILURE
190 ,atomic_long_read(&num_poisoned_pages) << (PAGE_SHIFT - 10) 197 , atomic_long_read(&num_poisoned_pages) << (PAGE_SHIFT - 10)
191#endif 198#endif
192#ifdef CONFIG_TRANSPARENT_HUGEPAGE 199#ifdef CONFIG_TRANSPARENT_HUGEPAGE
193 ,K(global_page_state(NR_ANON_TRANSPARENT_HUGEPAGES) * 200 , K(global_page_state(NR_ANON_TRANSPARENT_HUGEPAGES) *
194 HPAGE_PMD_NR) 201 HPAGE_PMD_NR)
195#endif 202#endif
203#ifdef CONFIG_CMA
204 , K(totalcma_pages)
205 , K(global_page_state(NR_FREE_CMA_PAGES))
206#endif
196 ); 207 );
197 208
198 hugetlb_report_meminfo(m); 209 hugetlb_report_meminfo(m);
diff --git a/fs/proc/stat.c b/fs/proc/stat.c
index bf2d03f8fd3e..510413eb25b8 100644
--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@ -159,7 +159,7 @@ static int show_stat(struct seq_file *p, void *v)
159 159
160 /* sum again ? it could be updated? */ 160 /* sum again ? it could be updated? */
161 for_each_irq_nr(j) 161 for_each_irq_nr(j)
162 seq_put_decimal_ull(p, ' ', kstat_irqs(j)); 162 seq_put_decimal_ull(p, ' ', kstat_irqs_usr(j));
163 163
164 seq_printf(p, 164 seq_printf(p,
165 "\nctxt %llu\n" 165 "\nctxt %llu\n"
diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c
index 73ca1740d839..0f96f71ab32b 100644
--- a/fs/proc_namespace.c
+++ b/fs/proc_namespace.c
@@ -91,6 +91,7 @@ static void show_type(struct seq_file *m, struct super_block *sb)
91 91
92static int show_vfsmnt(struct seq_file *m, struct vfsmount *mnt) 92static int show_vfsmnt(struct seq_file *m, struct vfsmount *mnt)
93{ 93{
94 struct proc_mounts *p = proc_mounts(m);
94 struct mount *r = real_mount(mnt); 95 struct mount *r = real_mount(mnt);
95 int err = 0; 96 int err = 0;
96 struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt }; 97 struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt };
@@ -104,7 +105,10 @@ static int show_vfsmnt(struct seq_file *m, struct vfsmount *mnt)
104 mangle(m, r->mnt_devname ? r->mnt_devname : "none"); 105 mangle(m, r->mnt_devname ? r->mnt_devname : "none");
105 } 106 }
106 seq_putc(m, ' '); 107 seq_putc(m, ' ');
107 seq_path(m, &mnt_path, " \t\n\\"); 108 /* mountpoints outside of chroot jail will give SEQ_SKIP on this */
109 err = seq_path_root(m, &mnt_path, &p->root, " \t\n\\");
110 if (err)
111 goto out;
108 seq_putc(m, ' '); 112 seq_putc(m, ' ');
109 show_type(m, sb); 113 show_type(m, sb);
110 seq_puts(m, __mnt_is_readonly(mnt) ? " ro" : " rw"); 114 seq_puts(m, __mnt_is_readonly(mnt) ? " ro" : " rw");
@@ -125,7 +129,6 @@ static int show_mountinfo(struct seq_file *m, struct vfsmount *mnt)
125 struct mount *r = real_mount(mnt); 129 struct mount *r = real_mount(mnt);
126 struct super_block *sb = mnt->mnt_sb; 130 struct super_block *sb = mnt->mnt_sb;
127 struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt }; 131 struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt };
128 struct path root = p->root;
129 int err = 0; 132 int err = 0;
130 133
131 seq_printf(m, "%i %i %u:%u ", r->mnt_id, r->mnt_parent->mnt_id, 134 seq_printf(m, "%i %i %u:%u ", r->mnt_id, r->mnt_parent->mnt_id,
@@ -139,7 +142,7 @@ static int show_mountinfo(struct seq_file *m, struct vfsmount *mnt)
139 seq_putc(m, ' '); 142 seq_putc(m, ' ');
140 143
141 /* mountpoints outside of chroot jail will give SEQ_SKIP on this */ 144 /* mountpoints outside of chroot jail will give SEQ_SKIP on this */
142 err = seq_path_root(m, &mnt_path, &root, " \t\n\\"); 145 err = seq_path_root(m, &mnt_path, &p->root, " \t\n\\");
143 if (err) 146 if (err)
144 goto out; 147 goto out;
145 148
@@ -182,6 +185,7 @@ out:
182 185
183static int show_vfsstat(struct seq_file *m, struct vfsmount *mnt) 186static int show_vfsstat(struct seq_file *m, struct vfsmount *mnt)
184{ 187{
188 struct proc_mounts *p = proc_mounts(m);
185 struct mount *r = real_mount(mnt); 189 struct mount *r = real_mount(mnt);
186 struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt }; 190 struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt };
187 struct super_block *sb = mnt_path.dentry->d_sb; 191 struct super_block *sb = mnt_path.dentry->d_sb;
@@ -201,7 +205,10 @@ static int show_vfsstat(struct seq_file *m, struct vfsmount *mnt)
201 205
202 /* mount point */ 206 /* mount point */
203 seq_puts(m, " mounted on "); 207 seq_puts(m, " mounted on ");
204 seq_path(m, &mnt_path, " \t\n\\"); 208 /* mountpoints outside of chroot jail will give SEQ_SKIP on this */
209 err = seq_path_root(m, &mnt_path, &p->root, " \t\n\\");
210 if (err)
211 goto out;
205 seq_putc(m, ' '); 212 seq_putc(m, ' ');
206 213
207 /* file system type */ 214 /* file system type */
@@ -216,6 +223,7 @@ static int show_vfsstat(struct seq_file *m, struct vfsmount *mnt)
216 } 223 }
217 224
218 seq_putc(m, '\n'); 225 seq_putc(m, '\n');
226out:
219 return err; 227 return err;
220} 228}
221 229