aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ceph/inode.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ceph/inode.c')
-rw-r--r--fs/ceph/inode.c171
1 files changed, 108 insertions, 63 deletions
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 62377ec37edf..d8858e96ab18 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -1,8 +1,7 @@
1#include "ceph_debug.h" 1#include <linux/ceph/ceph_debug.h>
2 2
3#include <linux/module.h> 3#include <linux/module.h>
4#include <linux/fs.h> 4#include <linux/fs.h>
5#include <linux/smp_lock.h>
6#include <linux/slab.h> 5#include <linux/slab.h>
7#include <linux/string.h> 6#include <linux/string.h>
8#include <linux/uaccess.h> 7#include <linux/uaccess.h>
@@ -13,7 +12,8 @@
13#include <linux/pagevec.h> 12#include <linux/pagevec.h>
14 13
15#include "super.h" 14#include "super.h"
16#include "decode.h" 15#include "mds_client.h"
16#include <linux/ceph/decode.h>
17 17
18/* 18/*
19 * Ceph inode operations 19 * Ceph inode operations
@@ -36,6 +36,13 @@ static void ceph_vmtruncate_work(struct work_struct *work);
36/* 36/*
37 * find or create an inode, given the ceph ino number 37 * find or create an inode, given the ceph ino number
38 */ 38 */
39static int ceph_set_ino_cb(struct inode *inode, void *data)
40{
41 ceph_inode(inode)->i_vino = *(struct ceph_vino *)data;
42 inode->i_ino = ceph_vino_to_ino(*(struct ceph_vino *)data);
43 return 0;
44}
45
39struct inode *ceph_get_inode(struct super_block *sb, struct ceph_vino vino) 46struct inode *ceph_get_inode(struct super_block *sb, struct ceph_vino vino)
40{ 47{
41 struct inode *inode; 48 struct inode *inode;
@@ -297,6 +304,8 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
297 ci->i_release_count = 0; 304 ci->i_release_count = 0;
298 ci->i_symlink = NULL; 305 ci->i_symlink = NULL;
299 306
307 memset(&ci->i_dir_layout, 0, sizeof(ci->i_dir_layout));
308
300 ci->i_fragtree = RB_ROOT; 309 ci->i_fragtree = RB_ROOT;
301 mutex_init(&ci->i_fragtree_mutex); 310 mutex_init(&ci->i_fragtree_mutex);
302 311
@@ -346,6 +355,7 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
346 ci->i_rd_ref = 0; 355 ci->i_rd_ref = 0;
347 ci->i_rdcache_ref = 0; 356 ci->i_rdcache_ref = 0;
348 ci->i_wr_ref = 0; 357 ci->i_wr_ref = 0;
358 ci->i_wb_ref = 0;
349 ci->i_wrbuffer_ref = 0; 359 ci->i_wrbuffer_ref = 0;
350 ci->i_wrbuffer_ref_head = 0; 360 ci->i_wrbuffer_ref_head = 0;
351 ci->i_shared_gen = 0; 361 ci->i_shared_gen = 0;
@@ -368,6 +378,15 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
368 return &ci->vfs_inode; 378 return &ci->vfs_inode;
369} 379}
370 380
381static void ceph_i_callback(struct rcu_head *head)
382{
383 struct inode *inode = container_of(head, struct inode, i_rcu);
384 struct ceph_inode_info *ci = ceph_inode(inode);
385
386 INIT_LIST_HEAD(&inode->i_dentry);
387 kmem_cache_free(ceph_inode_cachep, ci);
388}
389
371void ceph_destroy_inode(struct inode *inode) 390void ceph_destroy_inode(struct inode *inode)
372{ 391{
373 struct ceph_inode_info *ci = ceph_inode(inode); 392 struct ceph_inode_info *ci = ceph_inode(inode);
@@ -384,7 +403,7 @@ void ceph_destroy_inode(struct inode *inode)
384 */ 403 */
385 if (ci->i_snap_realm) { 404 if (ci->i_snap_realm) {
386 struct ceph_mds_client *mdsc = 405 struct ceph_mds_client *mdsc =
387 &ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc; 406 ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc;
388 struct ceph_snap_realm *realm = ci->i_snap_realm; 407 struct ceph_snap_realm *realm = ci->i_snap_realm;
389 408
390 dout(" dropping residual ref to snap realm %p\n", realm); 409 dout(" dropping residual ref to snap realm %p\n", realm);
@@ -407,7 +426,7 @@ void ceph_destroy_inode(struct inode *inode)
407 if (ci->i_xattrs.prealloc_blob) 426 if (ci->i_xattrs.prealloc_blob)
408 ceph_buffer_put(ci->i_xattrs.prealloc_blob); 427 ceph_buffer_put(ci->i_xattrs.prealloc_blob);
409 428
410 kmem_cache_free(ceph_inode_cachep, ci); 429 call_rcu(&inode->i_rcu, ceph_i_callback);
411} 430}
412 431
413 432
@@ -470,7 +489,9 @@ void ceph_fill_file_time(struct inode *inode, int issued,
470 489
471 if (issued & (CEPH_CAP_FILE_EXCL| 490 if (issued & (CEPH_CAP_FILE_EXCL|
472 CEPH_CAP_FILE_WR| 491 CEPH_CAP_FILE_WR|
473 CEPH_CAP_FILE_BUFFER)) { 492 CEPH_CAP_FILE_BUFFER|
493 CEPH_CAP_AUTH_EXCL|
494 CEPH_CAP_XATTR_EXCL)) {
474 if (timespec_compare(ctime, &inode->i_ctime) > 0) { 495 if (timespec_compare(ctime, &inode->i_ctime) > 0) {
475 dout("ctime %ld.%09ld -> %ld.%09ld inc w/ cap\n", 496 dout("ctime %ld.%09ld -> %ld.%09ld inc w/ cap\n",
476 inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec, 497 inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec,
@@ -510,7 +531,7 @@ void ceph_fill_file_time(struct inode *inode, int issued,
510 warn = 1; 531 warn = 1;
511 } 532 }
512 } else { 533 } else {
513 /* we have no write caps; whatever the MDS says is true */ 534 /* we have no write|excl caps; whatever the MDS says is true */
514 if (ceph_seq_cmp(time_warp_seq, ci->i_time_warp_seq) >= 0) { 535 if (ceph_seq_cmp(time_warp_seq, ci->i_time_warp_seq) >= 0) {
515 inode->i_ctime = *ctime; 536 inode->i_ctime = *ctime;
516 inode->i_mtime = *mtime; 537 inode->i_mtime = *mtime;
@@ -566,12 +587,17 @@ static int fill_inode(struct inode *inode,
566 587
567 /* 588 /*
568 * provided version will be odd if inode value is projected, 589 * provided version will be odd if inode value is projected,
569 * even if stable. skip the update if we have a newer info 590 * even if stable. skip the update if we have newer stable
570 * (e.g., due to inode info racing form multiple MDSs), or if 591 * info (ours>=theirs, e.g. due to racing mds replies), unless
571 * we are getting projected (unstable) inode info. 592 * we are getting projected (unstable) info (in which case the
593 * version is odd, and we want ours>theirs).
594 * us them
595 * 2 2 skip
596 * 3 2 skip
597 * 3 3 update
572 */ 598 */
573 if (le64_to_cpu(info->version) > 0 && 599 if (le64_to_cpu(info->version) > 0 &&
574 (ci->i_version & ~1) > le64_to_cpu(info->version)) 600 (ci->i_version & ~1) >= le64_to_cpu(info->version))
575 goto no_change; 601 goto no_change;
576 602
577 issued = __ceph_caps_issued(ci, &implemented); 603 issued = __ceph_caps_issued(ci, &implemented);
@@ -605,7 +631,14 @@ static int fill_inode(struct inode *inode,
605 le32_to_cpu(info->time_warp_seq), 631 le32_to_cpu(info->time_warp_seq),
606 &ctime, &mtime, &atime); 632 &ctime, &mtime, &atime);
607 633
608 ci->i_max_size = le64_to_cpu(info->max_size); 634 /* only update max_size on auth cap */
635 if ((info->cap.flags & CEPH_CAP_FLAG_AUTH) &&
636 ci->i_max_size != le64_to_cpu(info->max_size)) {
637 dout("max_size %lld -> %llu\n", ci->i_max_size,
638 le64_to_cpu(info->max_size));
639 ci->i_max_size = le64_to_cpu(info->max_size);
640 }
641
609 ci->i_layout = info->layout; 642 ci->i_layout = info->layout;
610 inode->i_blkbits = fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1; 643 inode->i_blkbits = fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1;
611 644
@@ -666,6 +699,8 @@ static int fill_inode(struct inode *inode,
666 inode->i_op = &ceph_dir_iops; 699 inode->i_op = &ceph_dir_iops;
667 inode->i_fop = &ceph_dir_fops; 700 inode->i_fop = &ceph_dir_fops;
668 701
702 ci->i_dir_layout = iinfo->dir_layout;
703
669 ci->i_files = le64_to_cpu(info->files); 704 ci->i_files = le64_to_cpu(info->files);
670 ci->i_subdirs = le64_to_cpu(info->subdirs); 705 ci->i_subdirs = le64_to_cpu(info->subdirs);
671 ci->i_rbytes = le64_to_cpu(info->rbytes); 706 ci->i_rbytes = le64_to_cpu(info->rbytes);
@@ -680,13 +715,9 @@ static int fill_inode(struct inode *inode,
680 (issued & CEPH_CAP_FILE_EXCL) == 0 && 715 (issued & CEPH_CAP_FILE_EXCL) == 0 &&
681 (ci->i_ceph_flags & CEPH_I_COMPLETE) == 0) { 716 (ci->i_ceph_flags & CEPH_I_COMPLETE) == 0) {
682 dout(" marking %p complete (empty)\n", inode); 717 dout(" marking %p complete (empty)\n", inode);
683 ci->i_ceph_flags |= CEPH_I_COMPLETE; 718 /* ci->i_ceph_flags |= CEPH_I_COMPLETE; */
684 ci->i_max_offset = 2; 719 ci->i_max_offset = 2;
685 } 720 }
686
687 /* it may be better to set st_size in getattr instead? */
688 if (ceph_test_opt(ceph_sb_to_client(inode->i_sb), RBYTES))
689 inode->i_size = ci->i_rbytes;
690 break; 721 break;
691 default: 722 default:
692 pr_err("fill_inode %llx.%llx BAD mode 0%o\n", 723 pr_err("fill_inode %llx.%llx BAD mode 0%o\n",
@@ -827,13 +858,13 @@ static void ceph_set_dentry_offset(struct dentry *dn)
827 di->offset = ceph_inode(inode)->i_max_offset++; 858 di->offset = ceph_inode(inode)->i_max_offset++;
828 spin_unlock(&inode->i_lock); 859 spin_unlock(&inode->i_lock);
829 860
830 spin_lock(&dcache_lock); 861 spin_lock(&dir->d_lock);
831 spin_lock(&dn->d_lock); 862 spin_lock_nested(&dn->d_lock, DENTRY_D_LOCK_NESTED);
832 list_move(&dn->d_u.d_child, &dir->d_subdirs); 863 list_move(&dn->d_u.d_child, &dir->d_subdirs);
833 dout("set_dentry_offset %p %lld (%p %p)\n", dn, di->offset, 864 dout("set_dentry_offset %p %lld (%p %p)\n", dn, di->offset,
834 dn->d_u.d_child.prev, dn->d_u.d_child.next); 865 dn->d_u.d_child.prev, dn->d_u.d_child.next);
835 spin_unlock(&dn->d_lock); 866 spin_unlock(&dn->d_lock);
836 spin_unlock(&dcache_lock); 867 spin_unlock(&dir->d_lock);
837} 868}
838 869
839/* 870/*
@@ -865,8 +896,8 @@ static struct dentry *splice_dentry(struct dentry *dn, struct inode *in,
865 } else if (realdn) { 896 } else if (realdn) {
866 dout("dn %p (%d) spliced with %p (%d) " 897 dout("dn %p (%d) spliced with %p (%d) "
867 "inode %p ino %llx.%llx\n", 898 "inode %p ino %llx.%llx\n",
868 dn, atomic_read(&dn->d_count), 899 dn, dn->d_count,
869 realdn, atomic_read(&realdn->d_count), 900 realdn, realdn->d_count,
870 realdn->d_inode, ceph_vinop(realdn->d_inode)); 901 realdn->d_inode, ceph_vinop(realdn->d_inode));
871 dput(dn); 902 dput(dn);
872 dn = realdn; 903 dn = realdn;
@@ -901,7 +932,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
901 struct inode *in = NULL; 932 struct inode *in = NULL;
902 struct ceph_mds_reply_inode *ininfo; 933 struct ceph_mds_reply_inode *ininfo;
903 struct ceph_vino vino; 934 struct ceph_vino vino;
904 struct ceph_client *client = ceph_sb_to_client(sb); 935 struct ceph_fs_client *fsc = ceph_sb_to_client(sb);
905 int i = 0; 936 int i = 0;
906 int err = 0; 937 int err = 0;
907 938
@@ -965,7 +996,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
965 */ 996 */
966 if (rinfo->head->is_dentry && !req->r_aborted && 997 if (rinfo->head->is_dentry && !req->r_aborted &&
967 (rinfo->head->is_target || strncmp(req->r_dentry->d_name.name, 998 (rinfo->head->is_target || strncmp(req->r_dentry->d_name.name,
968 client->mount_args->snapdir_name, 999 fsc->mount_options->snapdir_name,
969 req->r_dentry->d_name.len))) { 1000 req->r_dentry->d_name.len))) {
970 /* 1001 /*
971 * lookup link rename : null -> possibly existing inode 1002 * lookup link rename : null -> possibly existing inode
@@ -1007,9 +1038,6 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
1007 dout("fill_trace doing d_move %p -> %p\n", 1038 dout("fill_trace doing d_move %p -> %p\n",
1008 req->r_old_dentry, dn); 1039 req->r_old_dentry, dn);
1009 1040
1010 /* d_move screws up d_subdirs order */
1011 ceph_i_clear(dir, CEPH_I_COMPLETE);
1012
1013 d_move(req->r_old_dentry, dn); 1041 d_move(req->r_old_dentry, dn);
1014 dout(" src %p '%.*s' dst %p '%.*s'\n", 1042 dout(" src %p '%.*s' dst %p '%.*s'\n",
1015 req->r_old_dentry, 1043 req->r_old_dentry,
@@ -1021,12 +1049,15 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
1021 rehashing bug in vfs_rename_dir */ 1049 rehashing bug in vfs_rename_dir */
1022 ceph_invalidate_dentry_lease(dn); 1050 ceph_invalidate_dentry_lease(dn);
1023 1051
1024 /* take overwritten dentry's readdir offset */ 1052 /*
1025 dout("dn %p gets %p offset %lld (old offset %lld)\n", 1053 * d_move() puts the renamed dentry at the end of
1026 req->r_old_dentry, dn, ceph_dentry(dn)->offset, 1054 * d_subdirs. We need to assign it an appropriate
1055 * directory offset so we can behave when holding
1056 * I_COMPLETE.
1057 */
1058 ceph_set_dentry_offset(req->r_old_dentry);
1059 dout("dn %p gets new offset %lld\n", req->r_old_dentry,
1027 ceph_dentry(req->r_old_dentry)->offset); 1060 ceph_dentry(req->r_old_dentry)->offset);
1028 ceph_dentry(req->r_old_dentry)->offset =
1029 ceph_dentry(dn)->offset;
1030 1061
1031 dn = req->r_old_dentry; /* use old_dentry */ 1062 dn = req->r_old_dentry; /* use old_dentry */
1032 in = dn->d_inode; 1063 in = dn->d_inode;
@@ -1054,7 +1085,8 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
1054 ininfo = rinfo->targeti.in; 1085 ininfo = rinfo->targeti.in;
1055 vino.ino = le64_to_cpu(ininfo->ino); 1086 vino.ino = le64_to_cpu(ininfo->ino);
1056 vino.snap = le64_to_cpu(ininfo->snapid); 1087 vino.snap = le64_to_cpu(ininfo->snapid);
1057 if (!dn->d_inode) { 1088 in = dn->d_inode;
1089 if (!in) {
1058 in = ceph_get_inode(sb, vino); 1090 in = ceph_get_inode(sb, vino);
1059 if (IS_ERR(in)) { 1091 if (IS_ERR(in)) {
1060 pr_err("fill_trace bad get_inode " 1092 pr_err("fill_trace bad get_inode "
@@ -1069,10 +1101,10 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
1069 goto done; 1101 goto done;
1070 } 1102 }
1071 req->r_dentry = dn; /* may have spliced */ 1103 req->r_dentry = dn; /* may have spliced */
1072 igrab(in); 1104 ihold(in);
1073 } else if (ceph_ino(in) == vino.ino && 1105 } else if (ceph_ino(in) == vino.ino &&
1074 ceph_snap(in) == vino.snap) { 1106 ceph_snap(in) == vino.snap) {
1075 igrab(in); 1107 ihold(in);
1076 } else { 1108 } else {
1077 dout(" %p links to %p %llx.%llx, not %llx.%llx\n", 1109 dout(" %p links to %p %llx.%llx, not %llx.%llx\n",
1078 dn, in, ceph_ino(in), ceph_snap(in), 1110 dn, in, ceph_ino(in), ceph_snap(in),
@@ -1112,7 +1144,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
1112 goto done; 1144 goto done;
1113 } 1145 }
1114 req->r_dentry = dn; /* may have spliced */ 1146 req->r_dentry = dn; /* may have spliced */
1115 igrab(in); 1147 ihold(in);
1116 rinfo->head->is_dentry = 1; /* fool notrace handlers */ 1148 rinfo->head->is_dentry = 1; /* fool notrace handlers */
1117 } 1149 }
1118 1150
@@ -1216,11 +1248,11 @@ retry_lookup:
1216 goto retry_lookup; 1248 goto retry_lookup;
1217 } else { 1249 } else {
1218 /* reorder parent's d_subdirs */ 1250 /* reorder parent's d_subdirs */
1219 spin_lock(&dcache_lock); 1251 spin_lock(&parent->d_lock);
1220 spin_lock(&dn->d_lock); 1252 spin_lock_nested(&dn->d_lock, DENTRY_D_LOCK_NESTED);
1221 list_move(&dn->d_u.d_child, &parent->d_subdirs); 1253 list_move(&dn->d_u.d_child, &parent->d_subdirs);
1222 spin_unlock(&dn->d_lock); 1254 spin_unlock(&dn->d_lock);
1223 spin_unlock(&dcache_lock); 1255 spin_unlock(&parent->d_lock);
1224 } 1256 }
1225 1257
1226 di = dn->d_fsdata; 1258 di = dn->d_fsdata;
@@ -1296,7 +1328,7 @@ void ceph_queue_writeback(struct inode *inode)
1296 if (queue_work(ceph_inode_to_client(inode)->wb_wq, 1328 if (queue_work(ceph_inode_to_client(inode)->wb_wq,
1297 &ceph_inode(inode)->i_wb_work)) { 1329 &ceph_inode(inode)->i_wb_work)) {
1298 dout("ceph_queue_writeback %p\n", inode); 1330 dout("ceph_queue_writeback %p\n", inode);
1299 igrab(inode); 1331 ihold(inode);
1300 } else { 1332 } else {
1301 dout("ceph_queue_writeback %p failed\n", inode); 1333 dout("ceph_queue_writeback %p failed\n", inode);
1302 } 1334 }
@@ -1321,7 +1353,7 @@ void ceph_queue_invalidate(struct inode *inode)
1321 if (queue_work(ceph_inode_to_client(inode)->pg_inv_wq, 1353 if (queue_work(ceph_inode_to_client(inode)->pg_inv_wq,
1322 &ceph_inode(inode)->i_pg_inv_work)) { 1354 &ceph_inode(inode)->i_pg_inv_work)) {
1323 dout("ceph_queue_invalidate %p\n", inode); 1355 dout("ceph_queue_invalidate %p\n", inode);
1324 igrab(inode); 1356 ihold(inode);
1325 } else { 1357 } else {
1326 dout("ceph_queue_invalidate %p failed\n", inode); 1358 dout("ceph_queue_invalidate %p failed\n", inode);
1327 } 1359 }
@@ -1385,11 +1417,8 @@ static void ceph_invalidate_work(struct work_struct *work)
1385 spin_lock(&inode->i_lock); 1417 spin_lock(&inode->i_lock);
1386 dout("invalidate_pages %p gen %d revoking %d\n", inode, 1418 dout("invalidate_pages %p gen %d revoking %d\n", inode,
1387 ci->i_rdcache_gen, ci->i_rdcache_revoking); 1419 ci->i_rdcache_gen, ci->i_rdcache_revoking);
1388 if (ci->i_rdcache_gen == 0 || 1420 if (ci->i_rdcache_revoking != ci->i_rdcache_gen) {
1389 ci->i_rdcache_revoking != ci->i_rdcache_gen) {
1390 BUG_ON(ci->i_rdcache_revoking > ci->i_rdcache_gen);
1391 /* nevermind! */ 1421 /* nevermind! */
1392 ci->i_rdcache_revoking = 0;
1393 spin_unlock(&inode->i_lock); 1422 spin_unlock(&inode->i_lock);
1394 goto out; 1423 goto out;
1395 } 1424 }
@@ -1399,15 +1428,16 @@ static void ceph_invalidate_work(struct work_struct *work)
1399 ceph_invalidate_nondirty_pages(inode->i_mapping); 1428 ceph_invalidate_nondirty_pages(inode->i_mapping);
1400 1429
1401 spin_lock(&inode->i_lock); 1430 spin_lock(&inode->i_lock);
1402 if (orig_gen == ci->i_rdcache_gen) { 1431 if (orig_gen == ci->i_rdcache_gen &&
1432 orig_gen == ci->i_rdcache_revoking) {
1403 dout("invalidate_pages %p gen %d successful\n", inode, 1433 dout("invalidate_pages %p gen %d successful\n", inode,
1404 ci->i_rdcache_gen); 1434 ci->i_rdcache_gen);
1405 ci->i_rdcache_gen = 0; 1435 ci->i_rdcache_revoking--;
1406 ci->i_rdcache_revoking = 0;
1407 check = 1; 1436 check = 1;
1408 } else { 1437 } else {
1409 dout("invalidate_pages %p gen %d raced, gen now %d\n", 1438 dout("invalidate_pages %p gen %d raced, now %d revoking %d\n",
1410 inode, orig_gen, ci->i_rdcache_gen); 1439 inode, orig_gen, ci->i_rdcache_gen,
1440 ci->i_rdcache_revoking);
1411 } 1441 }
1412 spin_unlock(&inode->i_lock); 1442 spin_unlock(&inode->i_lock);
1413 1443
@@ -1447,7 +1477,7 @@ void ceph_queue_vmtruncate(struct inode *inode)
1447 if (queue_work(ceph_sb_to_client(inode->i_sb)->trunc_wq, 1477 if (queue_work(ceph_sb_to_client(inode->i_sb)->trunc_wq,
1448 &ci->i_vmtruncate_work)) { 1478 &ci->i_vmtruncate_work)) {
1449 dout("ceph_queue_vmtruncate %p\n", inode); 1479 dout("ceph_queue_vmtruncate %p\n", inode);
1450 igrab(inode); 1480 ihold(inode);
1451 } else { 1481 } else {
1452 dout("ceph_queue_vmtruncate %p failed, pending=%d\n", 1482 dout("ceph_queue_vmtruncate %p failed, pending=%d\n",
1453 inode, ci->i_truncate_pending); 1483 inode, ci->i_truncate_pending);
@@ -1533,11 +1563,12 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
1533 struct inode *parent_inode = dentry->d_parent->d_inode; 1563 struct inode *parent_inode = dentry->d_parent->d_inode;
1534 const unsigned int ia_valid = attr->ia_valid; 1564 const unsigned int ia_valid = attr->ia_valid;
1535 struct ceph_mds_request *req; 1565 struct ceph_mds_request *req;
1536 struct ceph_mds_client *mdsc = &ceph_sb_to_client(dentry->d_sb)->mdsc; 1566 struct ceph_mds_client *mdsc = ceph_sb_to_client(dentry->d_sb)->mdsc;
1537 int issued; 1567 int issued;
1538 int release = 0, dirtied = 0; 1568 int release = 0, dirtied = 0;
1539 int mask = 0; 1569 int mask = 0;
1540 int err = 0; 1570 int err = 0;
1571 int inode_dirty_flags = 0;
1541 1572
1542 if (ceph_snap(inode) != CEPH_NOSNAP) 1573 if (ceph_snap(inode) != CEPH_NOSNAP)
1543 return -EROFS; 1574 return -EROFS;
@@ -1696,15 +1727,19 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
1696 dout("setattr %p ATTR_FILE ... hrm!\n", inode); 1727 dout("setattr %p ATTR_FILE ... hrm!\n", inode);
1697 1728
1698 if (dirtied) { 1729 if (dirtied) {
1699 __ceph_mark_dirty_caps(ci, dirtied); 1730 inode_dirty_flags = __ceph_mark_dirty_caps(ci, dirtied);
1700 inode->i_ctime = CURRENT_TIME; 1731 inode->i_ctime = CURRENT_TIME;
1701 } 1732 }
1702 1733
1703 release &= issued; 1734 release &= issued;
1704 spin_unlock(&inode->i_lock); 1735 spin_unlock(&inode->i_lock);
1705 1736
1737 if (inode_dirty_flags)
1738 __mark_inode_dirty(inode, inode_dirty_flags);
1739
1706 if (mask) { 1740 if (mask) {
1707 req->r_inode = igrab(inode); 1741 req->r_inode = inode;
1742 ihold(inode);
1708 req->r_inode_drop = release; 1743 req->r_inode_drop = release;
1709 req->r_args.setattr.mask = cpu_to_le32(mask); 1744 req->r_args.setattr.mask = cpu_to_le32(mask);
1710 req->r_num_caps = 1; 1745 req->r_num_caps = 1;
@@ -1728,8 +1763,8 @@ out:
1728 */ 1763 */
1729int ceph_do_getattr(struct inode *inode, int mask) 1764int ceph_do_getattr(struct inode *inode, int mask)
1730{ 1765{
1731 struct ceph_client *client = ceph_sb_to_client(inode->i_sb); 1766 struct ceph_fs_client *fsc = ceph_sb_to_client(inode->i_sb);
1732 struct ceph_mds_client *mdsc = &client->mdsc; 1767 struct ceph_mds_client *mdsc = fsc->mdsc;
1733 struct ceph_mds_request *req; 1768 struct ceph_mds_request *req;
1734 int err; 1769 int err;
1735 1770
@@ -1738,14 +1773,15 @@ int ceph_do_getattr(struct inode *inode, int mask)
1738 return 0; 1773 return 0;
1739 } 1774 }
1740 1775
1741 dout("do_getattr inode %p mask %s\n", inode, ceph_cap_string(mask)); 1776 dout("do_getattr inode %p mask %s mode 0%o\n", inode, ceph_cap_string(mask), inode->i_mode);
1742 if (ceph_caps_issued_mask(ceph_inode(inode), mask, 1)) 1777 if (ceph_caps_issued_mask(ceph_inode(inode), mask, 1))
1743 return 0; 1778 return 0;
1744 1779
1745 req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, USE_ANY_MDS); 1780 req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, USE_ANY_MDS);
1746 if (IS_ERR(req)) 1781 if (IS_ERR(req))
1747 return PTR_ERR(req); 1782 return PTR_ERR(req);
1748 req->r_inode = igrab(inode); 1783 req->r_inode = inode;
1784 ihold(inode);
1749 req->r_num_caps = 1; 1785 req->r_num_caps = 1;
1750 req->r_args.getattr.mask = cpu_to_le32(mask); 1786 req->r_args.getattr.mask = cpu_to_le32(mask);
1751 err = ceph_mdsc_do_request(mdsc, NULL, req); 1787 err = ceph_mdsc_do_request(mdsc, NULL, req);
@@ -1759,12 +1795,17 @@ int ceph_do_getattr(struct inode *inode, int mask)
1759 * Check inode permissions. We verify we have a valid value for 1795 * Check inode permissions. We verify we have a valid value for
1760 * the AUTH cap, then call the generic handler. 1796 * the AUTH cap, then call the generic handler.
1761 */ 1797 */
1762int ceph_permission(struct inode *inode, int mask) 1798int ceph_permission(struct inode *inode, int mask, unsigned int flags)
1763{ 1799{
1764 int err = ceph_do_getattr(inode, CEPH_CAP_AUTH_SHARED); 1800 int err;
1801
1802 if (flags & IPERM_FLAG_RCU)
1803 return -ECHILD;
1804
1805 err = ceph_do_getattr(inode, CEPH_CAP_AUTH_SHARED);
1765 1806
1766 if (!err) 1807 if (!err)
1767 err = generic_permission(inode, mask, NULL); 1808 err = generic_permission(inode, mask, flags, NULL);
1768 return err; 1809 return err;
1769} 1810}
1770 1811
@@ -1782,13 +1823,17 @@ int ceph_getattr(struct vfsmount *mnt, struct dentry *dentry,
1782 err = ceph_do_getattr(inode, CEPH_STAT_CAP_INODE_ALL); 1823 err = ceph_do_getattr(inode, CEPH_STAT_CAP_INODE_ALL);
1783 if (!err) { 1824 if (!err) {
1784 generic_fillattr(inode, stat); 1825 generic_fillattr(inode, stat);
1785 stat->ino = inode->i_ino; 1826 stat->ino = ceph_translate_ino(inode->i_sb, inode->i_ino);
1786 if (ceph_snap(inode) != CEPH_NOSNAP) 1827 if (ceph_snap(inode) != CEPH_NOSNAP)
1787 stat->dev = ceph_snap(inode); 1828 stat->dev = ceph_snap(inode);
1788 else 1829 else
1789 stat->dev = 0; 1830 stat->dev = 0;
1790 if (S_ISDIR(inode->i_mode)) { 1831 if (S_ISDIR(inode->i_mode)) {
1791 stat->size = ci->i_rbytes; 1832 if (ceph_test_mount_opt(ceph_sb_to_client(inode->i_sb),
1833 RBYTES))
1834 stat->size = ci->i_rbytes;
1835 else
1836 stat->size = ci->i_files + ci->i_subdirs;
1792 stat->blocks = 0; 1837 stat->blocks = 0;
1793 stat->blksize = 65536; 1838 stat->blksize = 65536;
1794 } 1839 }