diff options
Diffstat (limited to 'fs/ceph/inode.c')
-rw-r--r-- | fs/ceph/inode.c | 171 |
1 files changed, 108 insertions, 63 deletions
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 62377ec37edf..d8858e96ab18 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c | |||
@@ -1,8 +1,7 @@ | |||
1 | #include "ceph_debug.h" | 1 | #include <linux/ceph/ceph_debug.h> |
2 | 2 | ||
3 | #include <linux/module.h> | 3 | #include <linux/module.h> |
4 | #include <linux/fs.h> | 4 | #include <linux/fs.h> |
5 | #include <linux/smp_lock.h> | ||
6 | #include <linux/slab.h> | 5 | #include <linux/slab.h> |
7 | #include <linux/string.h> | 6 | #include <linux/string.h> |
8 | #include <linux/uaccess.h> | 7 | #include <linux/uaccess.h> |
@@ -13,7 +12,8 @@ | |||
13 | #include <linux/pagevec.h> | 12 | #include <linux/pagevec.h> |
14 | 13 | ||
15 | #include "super.h" | 14 | #include "super.h" |
16 | #include "decode.h" | 15 | #include "mds_client.h" |
16 | #include <linux/ceph/decode.h> | ||
17 | 17 | ||
18 | /* | 18 | /* |
19 | * Ceph inode operations | 19 | * Ceph inode operations |
@@ -36,6 +36,13 @@ static void ceph_vmtruncate_work(struct work_struct *work); | |||
36 | /* | 36 | /* |
37 | * find or create an inode, given the ceph ino number | 37 | * find or create an inode, given the ceph ino number |
38 | */ | 38 | */ |
39 | static int ceph_set_ino_cb(struct inode *inode, void *data) | ||
40 | { | ||
41 | ceph_inode(inode)->i_vino = *(struct ceph_vino *)data; | ||
42 | inode->i_ino = ceph_vino_to_ino(*(struct ceph_vino *)data); | ||
43 | return 0; | ||
44 | } | ||
45 | |||
39 | struct inode *ceph_get_inode(struct super_block *sb, struct ceph_vino vino) | 46 | struct inode *ceph_get_inode(struct super_block *sb, struct ceph_vino vino) |
40 | { | 47 | { |
41 | struct inode *inode; | 48 | struct inode *inode; |
@@ -297,6 +304,8 @@ struct inode *ceph_alloc_inode(struct super_block *sb) | |||
297 | ci->i_release_count = 0; | 304 | ci->i_release_count = 0; |
298 | ci->i_symlink = NULL; | 305 | ci->i_symlink = NULL; |
299 | 306 | ||
307 | memset(&ci->i_dir_layout, 0, sizeof(ci->i_dir_layout)); | ||
308 | |||
300 | ci->i_fragtree = RB_ROOT; | 309 | ci->i_fragtree = RB_ROOT; |
301 | mutex_init(&ci->i_fragtree_mutex); | 310 | mutex_init(&ci->i_fragtree_mutex); |
302 | 311 | ||
@@ -346,6 +355,7 @@ struct inode *ceph_alloc_inode(struct super_block *sb) | |||
346 | ci->i_rd_ref = 0; | 355 | ci->i_rd_ref = 0; |
347 | ci->i_rdcache_ref = 0; | 356 | ci->i_rdcache_ref = 0; |
348 | ci->i_wr_ref = 0; | 357 | ci->i_wr_ref = 0; |
358 | ci->i_wb_ref = 0; | ||
349 | ci->i_wrbuffer_ref = 0; | 359 | ci->i_wrbuffer_ref = 0; |
350 | ci->i_wrbuffer_ref_head = 0; | 360 | ci->i_wrbuffer_ref_head = 0; |
351 | ci->i_shared_gen = 0; | 361 | ci->i_shared_gen = 0; |
@@ -368,6 +378,15 @@ struct inode *ceph_alloc_inode(struct super_block *sb) | |||
368 | return &ci->vfs_inode; | 378 | return &ci->vfs_inode; |
369 | } | 379 | } |
370 | 380 | ||
381 | static void ceph_i_callback(struct rcu_head *head) | ||
382 | { | ||
383 | struct inode *inode = container_of(head, struct inode, i_rcu); | ||
384 | struct ceph_inode_info *ci = ceph_inode(inode); | ||
385 | |||
386 | INIT_LIST_HEAD(&inode->i_dentry); | ||
387 | kmem_cache_free(ceph_inode_cachep, ci); | ||
388 | } | ||
389 | |||
371 | void ceph_destroy_inode(struct inode *inode) | 390 | void ceph_destroy_inode(struct inode *inode) |
372 | { | 391 | { |
373 | struct ceph_inode_info *ci = ceph_inode(inode); | 392 | struct ceph_inode_info *ci = ceph_inode(inode); |
@@ -384,7 +403,7 @@ void ceph_destroy_inode(struct inode *inode) | |||
384 | */ | 403 | */ |
385 | if (ci->i_snap_realm) { | 404 | if (ci->i_snap_realm) { |
386 | struct ceph_mds_client *mdsc = | 405 | struct ceph_mds_client *mdsc = |
387 | &ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc; | 406 | ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc; |
388 | struct ceph_snap_realm *realm = ci->i_snap_realm; | 407 | struct ceph_snap_realm *realm = ci->i_snap_realm; |
389 | 408 | ||
390 | dout(" dropping residual ref to snap realm %p\n", realm); | 409 | dout(" dropping residual ref to snap realm %p\n", realm); |
@@ -407,7 +426,7 @@ void ceph_destroy_inode(struct inode *inode) | |||
407 | if (ci->i_xattrs.prealloc_blob) | 426 | if (ci->i_xattrs.prealloc_blob) |
408 | ceph_buffer_put(ci->i_xattrs.prealloc_blob); | 427 | ceph_buffer_put(ci->i_xattrs.prealloc_blob); |
409 | 428 | ||
410 | kmem_cache_free(ceph_inode_cachep, ci); | 429 | call_rcu(&inode->i_rcu, ceph_i_callback); |
411 | } | 430 | } |
412 | 431 | ||
413 | 432 | ||
@@ -470,7 +489,9 @@ void ceph_fill_file_time(struct inode *inode, int issued, | |||
470 | 489 | ||
471 | if (issued & (CEPH_CAP_FILE_EXCL| | 490 | if (issued & (CEPH_CAP_FILE_EXCL| |
472 | CEPH_CAP_FILE_WR| | 491 | CEPH_CAP_FILE_WR| |
473 | CEPH_CAP_FILE_BUFFER)) { | 492 | CEPH_CAP_FILE_BUFFER| |
493 | CEPH_CAP_AUTH_EXCL| | ||
494 | CEPH_CAP_XATTR_EXCL)) { | ||
474 | if (timespec_compare(ctime, &inode->i_ctime) > 0) { | 495 | if (timespec_compare(ctime, &inode->i_ctime) > 0) { |
475 | dout("ctime %ld.%09ld -> %ld.%09ld inc w/ cap\n", | 496 | dout("ctime %ld.%09ld -> %ld.%09ld inc w/ cap\n", |
476 | inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec, | 497 | inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec, |
@@ -510,7 +531,7 @@ void ceph_fill_file_time(struct inode *inode, int issued, | |||
510 | warn = 1; | 531 | warn = 1; |
511 | } | 532 | } |
512 | } else { | 533 | } else { |
513 | /* we have no write caps; whatever the MDS says is true */ | 534 | /* we have no write|excl caps; whatever the MDS says is true */ |
514 | if (ceph_seq_cmp(time_warp_seq, ci->i_time_warp_seq) >= 0) { | 535 | if (ceph_seq_cmp(time_warp_seq, ci->i_time_warp_seq) >= 0) { |
515 | inode->i_ctime = *ctime; | 536 | inode->i_ctime = *ctime; |
516 | inode->i_mtime = *mtime; | 537 | inode->i_mtime = *mtime; |
@@ -566,12 +587,17 @@ static int fill_inode(struct inode *inode, | |||
566 | 587 | ||
567 | /* | 588 | /* |
568 | * provided version will be odd if inode value is projected, | 589 | * provided version will be odd if inode value is projected, |
569 | * even if stable. skip the update if we have a newer info | 590 | * even if stable. skip the update if we have newer stable |
570 | * (e.g., due to inode info racing form multiple MDSs), or if | 591 | * info (ours>=theirs, e.g. due to racing mds replies), unless |
571 | * we are getting projected (unstable) inode info. | 592 | * we are getting projected (unstable) info (in which case the |
593 | * version is odd, and we want ours>theirs). | ||
594 | * us them | ||
595 | * 2 2 skip | ||
596 | * 3 2 skip | ||
597 | * 3 3 update | ||
572 | */ | 598 | */ |
573 | if (le64_to_cpu(info->version) > 0 && | 599 | if (le64_to_cpu(info->version) > 0 && |
574 | (ci->i_version & ~1) > le64_to_cpu(info->version)) | 600 | (ci->i_version & ~1) >= le64_to_cpu(info->version)) |
575 | goto no_change; | 601 | goto no_change; |
576 | 602 | ||
577 | issued = __ceph_caps_issued(ci, &implemented); | 603 | issued = __ceph_caps_issued(ci, &implemented); |
@@ -605,7 +631,14 @@ static int fill_inode(struct inode *inode, | |||
605 | le32_to_cpu(info->time_warp_seq), | 631 | le32_to_cpu(info->time_warp_seq), |
606 | &ctime, &mtime, &atime); | 632 | &ctime, &mtime, &atime); |
607 | 633 | ||
608 | ci->i_max_size = le64_to_cpu(info->max_size); | 634 | /* only update max_size on auth cap */ |
635 | if ((info->cap.flags & CEPH_CAP_FLAG_AUTH) && | ||
636 | ci->i_max_size != le64_to_cpu(info->max_size)) { | ||
637 | dout("max_size %lld -> %llu\n", ci->i_max_size, | ||
638 | le64_to_cpu(info->max_size)); | ||
639 | ci->i_max_size = le64_to_cpu(info->max_size); | ||
640 | } | ||
641 | |||
609 | ci->i_layout = info->layout; | 642 | ci->i_layout = info->layout; |
610 | inode->i_blkbits = fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1; | 643 | inode->i_blkbits = fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1; |
611 | 644 | ||
@@ -666,6 +699,8 @@ static int fill_inode(struct inode *inode, | |||
666 | inode->i_op = &ceph_dir_iops; | 699 | inode->i_op = &ceph_dir_iops; |
667 | inode->i_fop = &ceph_dir_fops; | 700 | inode->i_fop = &ceph_dir_fops; |
668 | 701 | ||
702 | ci->i_dir_layout = iinfo->dir_layout; | ||
703 | |||
669 | ci->i_files = le64_to_cpu(info->files); | 704 | ci->i_files = le64_to_cpu(info->files); |
670 | ci->i_subdirs = le64_to_cpu(info->subdirs); | 705 | ci->i_subdirs = le64_to_cpu(info->subdirs); |
671 | ci->i_rbytes = le64_to_cpu(info->rbytes); | 706 | ci->i_rbytes = le64_to_cpu(info->rbytes); |
@@ -680,13 +715,9 @@ static int fill_inode(struct inode *inode, | |||
680 | (issued & CEPH_CAP_FILE_EXCL) == 0 && | 715 | (issued & CEPH_CAP_FILE_EXCL) == 0 && |
681 | (ci->i_ceph_flags & CEPH_I_COMPLETE) == 0) { | 716 | (ci->i_ceph_flags & CEPH_I_COMPLETE) == 0) { |
682 | dout(" marking %p complete (empty)\n", inode); | 717 | dout(" marking %p complete (empty)\n", inode); |
683 | ci->i_ceph_flags |= CEPH_I_COMPLETE; | 718 | /* ci->i_ceph_flags |= CEPH_I_COMPLETE; */ |
684 | ci->i_max_offset = 2; | 719 | ci->i_max_offset = 2; |
685 | } | 720 | } |
686 | |||
687 | /* it may be better to set st_size in getattr instead? */ | ||
688 | if (ceph_test_opt(ceph_sb_to_client(inode->i_sb), RBYTES)) | ||
689 | inode->i_size = ci->i_rbytes; | ||
690 | break; | 721 | break; |
691 | default: | 722 | default: |
692 | pr_err("fill_inode %llx.%llx BAD mode 0%o\n", | 723 | pr_err("fill_inode %llx.%llx BAD mode 0%o\n", |
@@ -827,13 +858,13 @@ static void ceph_set_dentry_offset(struct dentry *dn) | |||
827 | di->offset = ceph_inode(inode)->i_max_offset++; | 858 | di->offset = ceph_inode(inode)->i_max_offset++; |
828 | spin_unlock(&inode->i_lock); | 859 | spin_unlock(&inode->i_lock); |
829 | 860 | ||
830 | spin_lock(&dcache_lock); | 861 | spin_lock(&dir->d_lock); |
831 | spin_lock(&dn->d_lock); | 862 | spin_lock_nested(&dn->d_lock, DENTRY_D_LOCK_NESTED); |
832 | list_move(&dn->d_u.d_child, &dir->d_subdirs); | 863 | list_move(&dn->d_u.d_child, &dir->d_subdirs); |
833 | dout("set_dentry_offset %p %lld (%p %p)\n", dn, di->offset, | 864 | dout("set_dentry_offset %p %lld (%p %p)\n", dn, di->offset, |
834 | dn->d_u.d_child.prev, dn->d_u.d_child.next); | 865 | dn->d_u.d_child.prev, dn->d_u.d_child.next); |
835 | spin_unlock(&dn->d_lock); | 866 | spin_unlock(&dn->d_lock); |
836 | spin_unlock(&dcache_lock); | 867 | spin_unlock(&dir->d_lock); |
837 | } | 868 | } |
838 | 869 | ||
839 | /* | 870 | /* |
@@ -865,8 +896,8 @@ static struct dentry *splice_dentry(struct dentry *dn, struct inode *in, | |||
865 | } else if (realdn) { | 896 | } else if (realdn) { |
866 | dout("dn %p (%d) spliced with %p (%d) " | 897 | dout("dn %p (%d) spliced with %p (%d) " |
867 | "inode %p ino %llx.%llx\n", | 898 | "inode %p ino %llx.%llx\n", |
868 | dn, atomic_read(&dn->d_count), | 899 | dn, dn->d_count, |
869 | realdn, atomic_read(&realdn->d_count), | 900 | realdn, realdn->d_count, |
870 | realdn->d_inode, ceph_vinop(realdn->d_inode)); | 901 | realdn->d_inode, ceph_vinop(realdn->d_inode)); |
871 | dput(dn); | 902 | dput(dn); |
872 | dn = realdn; | 903 | dn = realdn; |
@@ -901,7 +932,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, | |||
901 | struct inode *in = NULL; | 932 | struct inode *in = NULL; |
902 | struct ceph_mds_reply_inode *ininfo; | 933 | struct ceph_mds_reply_inode *ininfo; |
903 | struct ceph_vino vino; | 934 | struct ceph_vino vino; |
904 | struct ceph_client *client = ceph_sb_to_client(sb); | 935 | struct ceph_fs_client *fsc = ceph_sb_to_client(sb); |
905 | int i = 0; | 936 | int i = 0; |
906 | int err = 0; | 937 | int err = 0; |
907 | 938 | ||
@@ -965,7 +996,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, | |||
965 | */ | 996 | */ |
966 | if (rinfo->head->is_dentry && !req->r_aborted && | 997 | if (rinfo->head->is_dentry && !req->r_aborted && |
967 | (rinfo->head->is_target || strncmp(req->r_dentry->d_name.name, | 998 | (rinfo->head->is_target || strncmp(req->r_dentry->d_name.name, |
968 | client->mount_args->snapdir_name, | 999 | fsc->mount_options->snapdir_name, |
969 | req->r_dentry->d_name.len))) { | 1000 | req->r_dentry->d_name.len))) { |
970 | /* | 1001 | /* |
971 | * lookup link rename : null -> possibly existing inode | 1002 | * lookup link rename : null -> possibly existing inode |
@@ -1007,9 +1038,6 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, | |||
1007 | dout("fill_trace doing d_move %p -> %p\n", | 1038 | dout("fill_trace doing d_move %p -> %p\n", |
1008 | req->r_old_dentry, dn); | 1039 | req->r_old_dentry, dn); |
1009 | 1040 | ||
1010 | /* d_move screws up d_subdirs order */ | ||
1011 | ceph_i_clear(dir, CEPH_I_COMPLETE); | ||
1012 | |||
1013 | d_move(req->r_old_dentry, dn); | 1041 | d_move(req->r_old_dentry, dn); |
1014 | dout(" src %p '%.*s' dst %p '%.*s'\n", | 1042 | dout(" src %p '%.*s' dst %p '%.*s'\n", |
1015 | req->r_old_dentry, | 1043 | req->r_old_dentry, |
@@ -1021,12 +1049,15 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, | |||
1021 | rehashing bug in vfs_rename_dir */ | 1049 | rehashing bug in vfs_rename_dir */ |
1022 | ceph_invalidate_dentry_lease(dn); | 1050 | ceph_invalidate_dentry_lease(dn); |
1023 | 1051 | ||
1024 | /* take overwritten dentry's readdir offset */ | 1052 | /* |
1025 | dout("dn %p gets %p offset %lld (old offset %lld)\n", | 1053 | * d_move() puts the renamed dentry at the end of |
1026 | req->r_old_dentry, dn, ceph_dentry(dn)->offset, | 1054 | * d_subdirs. We need to assign it an appropriate |
1055 | * directory offset so we can behave when holding | ||
1056 | * I_COMPLETE. | ||
1057 | */ | ||
1058 | ceph_set_dentry_offset(req->r_old_dentry); | ||
1059 | dout("dn %p gets new offset %lld\n", req->r_old_dentry, | ||
1027 | ceph_dentry(req->r_old_dentry)->offset); | 1060 | ceph_dentry(req->r_old_dentry)->offset); |
1028 | ceph_dentry(req->r_old_dentry)->offset = | ||
1029 | ceph_dentry(dn)->offset; | ||
1030 | 1061 | ||
1031 | dn = req->r_old_dentry; /* use old_dentry */ | 1062 | dn = req->r_old_dentry; /* use old_dentry */ |
1032 | in = dn->d_inode; | 1063 | in = dn->d_inode; |
@@ -1054,7 +1085,8 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, | |||
1054 | ininfo = rinfo->targeti.in; | 1085 | ininfo = rinfo->targeti.in; |
1055 | vino.ino = le64_to_cpu(ininfo->ino); | 1086 | vino.ino = le64_to_cpu(ininfo->ino); |
1056 | vino.snap = le64_to_cpu(ininfo->snapid); | 1087 | vino.snap = le64_to_cpu(ininfo->snapid); |
1057 | if (!dn->d_inode) { | 1088 | in = dn->d_inode; |
1089 | if (!in) { | ||
1058 | in = ceph_get_inode(sb, vino); | 1090 | in = ceph_get_inode(sb, vino); |
1059 | if (IS_ERR(in)) { | 1091 | if (IS_ERR(in)) { |
1060 | pr_err("fill_trace bad get_inode " | 1092 | pr_err("fill_trace bad get_inode " |
@@ -1069,10 +1101,10 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, | |||
1069 | goto done; | 1101 | goto done; |
1070 | } | 1102 | } |
1071 | req->r_dentry = dn; /* may have spliced */ | 1103 | req->r_dentry = dn; /* may have spliced */ |
1072 | igrab(in); | 1104 | ihold(in); |
1073 | } else if (ceph_ino(in) == vino.ino && | 1105 | } else if (ceph_ino(in) == vino.ino && |
1074 | ceph_snap(in) == vino.snap) { | 1106 | ceph_snap(in) == vino.snap) { |
1075 | igrab(in); | 1107 | ihold(in); |
1076 | } else { | 1108 | } else { |
1077 | dout(" %p links to %p %llx.%llx, not %llx.%llx\n", | 1109 | dout(" %p links to %p %llx.%llx, not %llx.%llx\n", |
1078 | dn, in, ceph_ino(in), ceph_snap(in), | 1110 | dn, in, ceph_ino(in), ceph_snap(in), |
@@ -1112,7 +1144,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, | |||
1112 | goto done; | 1144 | goto done; |
1113 | } | 1145 | } |
1114 | req->r_dentry = dn; /* may have spliced */ | 1146 | req->r_dentry = dn; /* may have spliced */ |
1115 | igrab(in); | 1147 | ihold(in); |
1116 | rinfo->head->is_dentry = 1; /* fool notrace handlers */ | 1148 | rinfo->head->is_dentry = 1; /* fool notrace handlers */ |
1117 | } | 1149 | } |
1118 | 1150 | ||
@@ -1216,11 +1248,11 @@ retry_lookup: | |||
1216 | goto retry_lookup; | 1248 | goto retry_lookup; |
1217 | } else { | 1249 | } else { |
1218 | /* reorder parent's d_subdirs */ | 1250 | /* reorder parent's d_subdirs */ |
1219 | spin_lock(&dcache_lock); | 1251 | spin_lock(&parent->d_lock); |
1220 | spin_lock(&dn->d_lock); | 1252 | spin_lock_nested(&dn->d_lock, DENTRY_D_LOCK_NESTED); |
1221 | list_move(&dn->d_u.d_child, &parent->d_subdirs); | 1253 | list_move(&dn->d_u.d_child, &parent->d_subdirs); |
1222 | spin_unlock(&dn->d_lock); | 1254 | spin_unlock(&dn->d_lock); |
1223 | spin_unlock(&dcache_lock); | 1255 | spin_unlock(&parent->d_lock); |
1224 | } | 1256 | } |
1225 | 1257 | ||
1226 | di = dn->d_fsdata; | 1258 | di = dn->d_fsdata; |
@@ -1296,7 +1328,7 @@ void ceph_queue_writeback(struct inode *inode) | |||
1296 | if (queue_work(ceph_inode_to_client(inode)->wb_wq, | 1328 | if (queue_work(ceph_inode_to_client(inode)->wb_wq, |
1297 | &ceph_inode(inode)->i_wb_work)) { | 1329 | &ceph_inode(inode)->i_wb_work)) { |
1298 | dout("ceph_queue_writeback %p\n", inode); | 1330 | dout("ceph_queue_writeback %p\n", inode); |
1299 | igrab(inode); | 1331 | ihold(inode); |
1300 | } else { | 1332 | } else { |
1301 | dout("ceph_queue_writeback %p failed\n", inode); | 1333 | dout("ceph_queue_writeback %p failed\n", inode); |
1302 | } | 1334 | } |
@@ -1321,7 +1353,7 @@ void ceph_queue_invalidate(struct inode *inode) | |||
1321 | if (queue_work(ceph_inode_to_client(inode)->pg_inv_wq, | 1353 | if (queue_work(ceph_inode_to_client(inode)->pg_inv_wq, |
1322 | &ceph_inode(inode)->i_pg_inv_work)) { | 1354 | &ceph_inode(inode)->i_pg_inv_work)) { |
1323 | dout("ceph_queue_invalidate %p\n", inode); | 1355 | dout("ceph_queue_invalidate %p\n", inode); |
1324 | igrab(inode); | 1356 | ihold(inode); |
1325 | } else { | 1357 | } else { |
1326 | dout("ceph_queue_invalidate %p failed\n", inode); | 1358 | dout("ceph_queue_invalidate %p failed\n", inode); |
1327 | } | 1359 | } |
@@ -1385,11 +1417,8 @@ static void ceph_invalidate_work(struct work_struct *work) | |||
1385 | spin_lock(&inode->i_lock); | 1417 | spin_lock(&inode->i_lock); |
1386 | dout("invalidate_pages %p gen %d revoking %d\n", inode, | 1418 | dout("invalidate_pages %p gen %d revoking %d\n", inode, |
1387 | ci->i_rdcache_gen, ci->i_rdcache_revoking); | 1419 | ci->i_rdcache_gen, ci->i_rdcache_revoking); |
1388 | if (ci->i_rdcache_gen == 0 || | 1420 | if (ci->i_rdcache_revoking != ci->i_rdcache_gen) { |
1389 | ci->i_rdcache_revoking != ci->i_rdcache_gen) { | ||
1390 | BUG_ON(ci->i_rdcache_revoking > ci->i_rdcache_gen); | ||
1391 | /* nevermind! */ | 1421 | /* nevermind! */ |
1392 | ci->i_rdcache_revoking = 0; | ||
1393 | spin_unlock(&inode->i_lock); | 1422 | spin_unlock(&inode->i_lock); |
1394 | goto out; | 1423 | goto out; |
1395 | } | 1424 | } |
@@ -1399,15 +1428,16 @@ static void ceph_invalidate_work(struct work_struct *work) | |||
1399 | ceph_invalidate_nondirty_pages(inode->i_mapping); | 1428 | ceph_invalidate_nondirty_pages(inode->i_mapping); |
1400 | 1429 | ||
1401 | spin_lock(&inode->i_lock); | 1430 | spin_lock(&inode->i_lock); |
1402 | if (orig_gen == ci->i_rdcache_gen) { | 1431 | if (orig_gen == ci->i_rdcache_gen && |
1432 | orig_gen == ci->i_rdcache_revoking) { | ||
1403 | dout("invalidate_pages %p gen %d successful\n", inode, | 1433 | dout("invalidate_pages %p gen %d successful\n", inode, |
1404 | ci->i_rdcache_gen); | 1434 | ci->i_rdcache_gen); |
1405 | ci->i_rdcache_gen = 0; | 1435 | ci->i_rdcache_revoking--; |
1406 | ci->i_rdcache_revoking = 0; | ||
1407 | check = 1; | 1436 | check = 1; |
1408 | } else { | 1437 | } else { |
1409 | dout("invalidate_pages %p gen %d raced, gen now %d\n", | 1438 | dout("invalidate_pages %p gen %d raced, now %d revoking %d\n", |
1410 | inode, orig_gen, ci->i_rdcache_gen); | 1439 | inode, orig_gen, ci->i_rdcache_gen, |
1440 | ci->i_rdcache_revoking); | ||
1411 | } | 1441 | } |
1412 | spin_unlock(&inode->i_lock); | 1442 | spin_unlock(&inode->i_lock); |
1413 | 1443 | ||
@@ -1447,7 +1477,7 @@ void ceph_queue_vmtruncate(struct inode *inode) | |||
1447 | if (queue_work(ceph_sb_to_client(inode->i_sb)->trunc_wq, | 1477 | if (queue_work(ceph_sb_to_client(inode->i_sb)->trunc_wq, |
1448 | &ci->i_vmtruncate_work)) { | 1478 | &ci->i_vmtruncate_work)) { |
1449 | dout("ceph_queue_vmtruncate %p\n", inode); | 1479 | dout("ceph_queue_vmtruncate %p\n", inode); |
1450 | igrab(inode); | 1480 | ihold(inode); |
1451 | } else { | 1481 | } else { |
1452 | dout("ceph_queue_vmtruncate %p failed, pending=%d\n", | 1482 | dout("ceph_queue_vmtruncate %p failed, pending=%d\n", |
1453 | inode, ci->i_truncate_pending); | 1483 | inode, ci->i_truncate_pending); |
@@ -1533,11 +1563,12 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr) | |||
1533 | struct inode *parent_inode = dentry->d_parent->d_inode; | 1563 | struct inode *parent_inode = dentry->d_parent->d_inode; |
1534 | const unsigned int ia_valid = attr->ia_valid; | 1564 | const unsigned int ia_valid = attr->ia_valid; |
1535 | struct ceph_mds_request *req; | 1565 | struct ceph_mds_request *req; |
1536 | struct ceph_mds_client *mdsc = &ceph_sb_to_client(dentry->d_sb)->mdsc; | 1566 | struct ceph_mds_client *mdsc = ceph_sb_to_client(dentry->d_sb)->mdsc; |
1537 | int issued; | 1567 | int issued; |
1538 | int release = 0, dirtied = 0; | 1568 | int release = 0, dirtied = 0; |
1539 | int mask = 0; | 1569 | int mask = 0; |
1540 | int err = 0; | 1570 | int err = 0; |
1571 | int inode_dirty_flags = 0; | ||
1541 | 1572 | ||
1542 | if (ceph_snap(inode) != CEPH_NOSNAP) | 1573 | if (ceph_snap(inode) != CEPH_NOSNAP) |
1543 | return -EROFS; | 1574 | return -EROFS; |
@@ -1696,15 +1727,19 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr) | |||
1696 | dout("setattr %p ATTR_FILE ... hrm!\n", inode); | 1727 | dout("setattr %p ATTR_FILE ... hrm!\n", inode); |
1697 | 1728 | ||
1698 | if (dirtied) { | 1729 | if (dirtied) { |
1699 | __ceph_mark_dirty_caps(ci, dirtied); | 1730 | inode_dirty_flags = __ceph_mark_dirty_caps(ci, dirtied); |
1700 | inode->i_ctime = CURRENT_TIME; | 1731 | inode->i_ctime = CURRENT_TIME; |
1701 | } | 1732 | } |
1702 | 1733 | ||
1703 | release &= issued; | 1734 | release &= issued; |
1704 | spin_unlock(&inode->i_lock); | 1735 | spin_unlock(&inode->i_lock); |
1705 | 1736 | ||
1737 | if (inode_dirty_flags) | ||
1738 | __mark_inode_dirty(inode, inode_dirty_flags); | ||
1739 | |||
1706 | if (mask) { | 1740 | if (mask) { |
1707 | req->r_inode = igrab(inode); | 1741 | req->r_inode = inode; |
1742 | ihold(inode); | ||
1708 | req->r_inode_drop = release; | 1743 | req->r_inode_drop = release; |
1709 | req->r_args.setattr.mask = cpu_to_le32(mask); | 1744 | req->r_args.setattr.mask = cpu_to_le32(mask); |
1710 | req->r_num_caps = 1; | 1745 | req->r_num_caps = 1; |
@@ -1728,8 +1763,8 @@ out: | |||
1728 | */ | 1763 | */ |
1729 | int ceph_do_getattr(struct inode *inode, int mask) | 1764 | int ceph_do_getattr(struct inode *inode, int mask) |
1730 | { | 1765 | { |
1731 | struct ceph_client *client = ceph_sb_to_client(inode->i_sb); | 1766 | struct ceph_fs_client *fsc = ceph_sb_to_client(inode->i_sb); |
1732 | struct ceph_mds_client *mdsc = &client->mdsc; | 1767 | struct ceph_mds_client *mdsc = fsc->mdsc; |
1733 | struct ceph_mds_request *req; | 1768 | struct ceph_mds_request *req; |
1734 | int err; | 1769 | int err; |
1735 | 1770 | ||
@@ -1738,14 +1773,15 @@ int ceph_do_getattr(struct inode *inode, int mask) | |||
1738 | return 0; | 1773 | return 0; |
1739 | } | 1774 | } |
1740 | 1775 | ||
1741 | dout("do_getattr inode %p mask %s\n", inode, ceph_cap_string(mask)); | 1776 | dout("do_getattr inode %p mask %s mode 0%o\n", inode, ceph_cap_string(mask), inode->i_mode); |
1742 | if (ceph_caps_issued_mask(ceph_inode(inode), mask, 1)) | 1777 | if (ceph_caps_issued_mask(ceph_inode(inode), mask, 1)) |
1743 | return 0; | 1778 | return 0; |
1744 | 1779 | ||
1745 | req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, USE_ANY_MDS); | 1780 | req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, USE_ANY_MDS); |
1746 | if (IS_ERR(req)) | 1781 | if (IS_ERR(req)) |
1747 | return PTR_ERR(req); | 1782 | return PTR_ERR(req); |
1748 | req->r_inode = igrab(inode); | 1783 | req->r_inode = inode; |
1784 | ihold(inode); | ||
1749 | req->r_num_caps = 1; | 1785 | req->r_num_caps = 1; |
1750 | req->r_args.getattr.mask = cpu_to_le32(mask); | 1786 | req->r_args.getattr.mask = cpu_to_le32(mask); |
1751 | err = ceph_mdsc_do_request(mdsc, NULL, req); | 1787 | err = ceph_mdsc_do_request(mdsc, NULL, req); |
@@ -1759,12 +1795,17 @@ int ceph_do_getattr(struct inode *inode, int mask) | |||
1759 | * Check inode permissions. We verify we have a valid value for | 1795 | * Check inode permissions. We verify we have a valid value for |
1760 | * the AUTH cap, then call the generic handler. | 1796 | * the AUTH cap, then call the generic handler. |
1761 | */ | 1797 | */ |
1762 | int ceph_permission(struct inode *inode, int mask) | 1798 | int ceph_permission(struct inode *inode, int mask, unsigned int flags) |
1763 | { | 1799 | { |
1764 | int err = ceph_do_getattr(inode, CEPH_CAP_AUTH_SHARED); | 1800 | int err; |
1801 | |||
1802 | if (flags & IPERM_FLAG_RCU) | ||
1803 | return -ECHILD; | ||
1804 | |||
1805 | err = ceph_do_getattr(inode, CEPH_CAP_AUTH_SHARED); | ||
1765 | 1806 | ||
1766 | if (!err) | 1807 | if (!err) |
1767 | err = generic_permission(inode, mask, NULL); | 1808 | err = generic_permission(inode, mask, flags, NULL); |
1768 | return err; | 1809 | return err; |
1769 | } | 1810 | } |
1770 | 1811 | ||
@@ -1782,13 +1823,17 @@ int ceph_getattr(struct vfsmount *mnt, struct dentry *dentry, | |||
1782 | err = ceph_do_getattr(inode, CEPH_STAT_CAP_INODE_ALL); | 1823 | err = ceph_do_getattr(inode, CEPH_STAT_CAP_INODE_ALL); |
1783 | if (!err) { | 1824 | if (!err) { |
1784 | generic_fillattr(inode, stat); | 1825 | generic_fillattr(inode, stat); |
1785 | stat->ino = inode->i_ino; | 1826 | stat->ino = ceph_translate_ino(inode->i_sb, inode->i_ino); |
1786 | if (ceph_snap(inode) != CEPH_NOSNAP) | 1827 | if (ceph_snap(inode) != CEPH_NOSNAP) |
1787 | stat->dev = ceph_snap(inode); | 1828 | stat->dev = ceph_snap(inode); |
1788 | else | 1829 | else |
1789 | stat->dev = 0; | 1830 | stat->dev = 0; |
1790 | if (S_ISDIR(inode->i_mode)) { | 1831 | if (S_ISDIR(inode->i_mode)) { |
1791 | stat->size = ci->i_rbytes; | 1832 | if (ceph_test_mount_opt(ceph_sb_to_client(inode->i_sb), |
1833 | RBYTES)) | ||
1834 | stat->size = ci->i_rbytes; | ||
1835 | else | ||
1836 | stat->size = ci->i_files + ci->i_subdirs; | ||
1792 | stat->blocks = 0; | 1837 | stat->blocks = 0; |
1793 | stat->blksize = 65536; | 1838 | stat->blksize = 65536; |
1794 | } | 1839 | } |