aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ceph
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-06-13 02:06:23 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-06-13 02:06:23 -0400
commit6d87c225f5d82d29243dc124f1ffcbb0e14ec358 (patch)
tree7d72e2e6a77ec0911e86911d2ddae62c1b4161cf /fs/ceph
parent338c09a94b14c449dd53227e9bea44816668c6a5 (diff)
parent22001f619f29ddf66582d834223dcff4c0b74595 (diff)
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client
Pull Ceph updates from Sage Weil: "This has a mix of bug fixes and cleanups. Alex's patch fixes a rare race in RBD. Ilya's patches fix an ENOENT check when a second rbd image is mapped and a couple memory leaks. Zheng fixes several issues with fragmented directories and multiple MDSs. Josh fixes a spin/sleep issue, and Josh and Guangliang's patches fix setting and unsetting RBD images read-only. Naturally there are several other cleanups mixed in for good measure" * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client: (23 commits) rbd: only set disk to read-only once rbd: move calls that may sleep out of spin lock range rbd: add ioctl for rbd ceph: use truncate_pagecache() instead of truncate_inode_pages() ceph: include time stamp in every MDS request rbd: fix ida/idr memory leak rbd: use reference counts for image requests rbd: fix osd_request memory leak in __rbd_dev_header_watch_sync() rbd: make sure we have latest osdmap on 'rbd map' libceph: add ceph_monc_wait_osdmap() libceph: mon_get_version request infrastructure libceph: recognize poolop requests in debugfs ceph: refactor readpage_nounlock() to make the logic clearer mds: check cap ID when handling cap export message ceph: remember subtree root dirfrag's auth MDS ceph: introduce ceph_fill_fragtree() ceph: handle cap import atomically ceph: pre-allocate ceph_cap struct for ceph_add_cap() ceph: update inode fields according to issued caps rbd: replace IS_ERR and PTR_ERR with PTR_ERR_OR_ZERO ...
Diffstat (limited to 'fs/ceph')
-rw-r--r--fs/ceph/acl.c6
-rw-r--r--fs/ceph/addr.c17
-rw-r--r--fs/ceph/caps.c246
-rw-r--r--fs/ceph/export.c2
-rw-r--r--fs/ceph/inode.c247
-rw-r--r--fs/ceph/mds_client.c9
-rw-r--r--fs/ceph/mds_client.h1
-rw-r--r--fs/ceph/super.h13
8 files changed, 310 insertions, 231 deletions
diff --git a/fs/ceph/acl.c b/fs/ceph/acl.c
index 21887d63dad5..469f2e8657e8 100644
--- a/fs/ceph/acl.c
+++ b/fs/ceph/acl.c
@@ -104,12 +104,6 @@ int ceph_set_acl(struct inode *inode, struct posix_acl *acl, int type)
104 umode_t new_mode = inode->i_mode, old_mode = inode->i_mode; 104 umode_t new_mode = inode->i_mode, old_mode = inode->i_mode;
105 struct dentry *dentry; 105 struct dentry *dentry;
106 106
107 if (acl) {
108 ret = posix_acl_valid(acl);
109 if (ret < 0)
110 goto out;
111 }
112
113 switch (type) { 107 switch (type) {
114 case ACL_TYPE_ACCESS: 108 case ACL_TYPE_ACCESS:
115 name = POSIX_ACL_XATTR_ACCESS; 109 name = POSIX_ACL_XATTR_ACCESS;
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 4f3f69079f36..90b3954d48ed 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -211,18 +211,15 @@ static int readpage_nounlock(struct file *filp, struct page *page)
211 SetPageError(page); 211 SetPageError(page);
212 ceph_fscache_readpage_cancel(inode, page); 212 ceph_fscache_readpage_cancel(inode, page);
213 goto out; 213 goto out;
214 } else {
215 if (err < PAGE_CACHE_SIZE) {
216 /* zero fill remainder of page */
217 zero_user_segment(page, err, PAGE_CACHE_SIZE);
218 } else {
219 flush_dcache_page(page);
220 }
221 } 214 }
222 SetPageUptodate(page); 215 if (err < PAGE_CACHE_SIZE)
216 /* zero fill remainder of page */
217 zero_user_segment(page, err, PAGE_CACHE_SIZE);
218 else
219 flush_dcache_page(page);
223 220
224 if (err >= 0) 221 SetPageUptodate(page);
225 ceph_readpage_to_fscache(inode, page); 222 ceph_readpage_to_fscache(inode, page);
226 223
227out: 224out:
228 return err < 0 ? err : 0; 225 return err < 0 ? err : 0;
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index c561b628ebce..1fde164b74b5 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -221,8 +221,8 @@ int ceph_unreserve_caps(struct ceph_mds_client *mdsc,
221 return 0; 221 return 0;
222} 222}
223 223
224static struct ceph_cap *get_cap(struct ceph_mds_client *mdsc, 224struct ceph_cap *ceph_get_cap(struct ceph_mds_client *mdsc,
225 struct ceph_cap_reservation *ctx) 225 struct ceph_cap_reservation *ctx)
226{ 226{
227 struct ceph_cap *cap = NULL; 227 struct ceph_cap *cap = NULL;
228 228
@@ -508,15 +508,14 @@ static void __check_cap_issue(struct ceph_inode_info *ci, struct ceph_cap *cap,
508 * it is < 0. (This is so we can atomically add the cap and add an 508 * it is < 0. (This is so we can atomically add the cap and add an
509 * open file reference to it.) 509 * open file reference to it.)
510 */ 510 */
511int ceph_add_cap(struct inode *inode, 511void ceph_add_cap(struct inode *inode,
512 struct ceph_mds_session *session, u64 cap_id, 512 struct ceph_mds_session *session, u64 cap_id,
513 int fmode, unsigned issued, unsigned wanted, 513 int fmode, unsigned issued, unsigned wanted,
514 unsigned seq, unsigned mseq, u64 realmino, int flags, 514 unsigned seq, unsigned mseq, u64 realmino, int flags,
515 struct ceph_cap_reservation *caps_reservation) 515 struct ceph_cap **new_cap)
516{ 516{
517 struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; 517 struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
518 struct ceph_inode_info *ci = ceph_inode(inode); 518 struct ceph_inode_info *ci = ceph_inode(inode);
519 struct ceph_cap *new_cap = NULL;
520 struct ceph_cap *cap; 519 struct ceph_cap *cap;
521 int mds = session->s_mds; 520 int mds = session->s_mds;
522 int actual_wanted; 521 int actual_wanted;
@@ -531,20 +530,10 @@ int ceph_add_cap(struct inode *inode,
531 if (fmode >= 0) 530 if (fmode >= 0)
532 wanted |= ceph_caps_for_mode(fmode); 531 wanted |= ceph_caps_for_mode(fmode);
533 532
534retry:
535 spin_lock(&ci->i_ceph_lock);
536 cap = __get_cap_for_mds(ci, mds); 533 cap = __get_cap_for_mds(ci, mds);
537 if (!cap) { 534 if (!cap) {
538 if (new_cap) { 535 cap = *new_cap;
539 cap = new_cap; 536 *new_cap = NULL;
540 new_cap = NULL;
541 } else {
542 spin_unlock(&ci->i_ceph_lock);
543 new_cap = get_cap(mdsc, caps_reservation);
544 if (new_cap == NULL)
545 return -ENOMEM;
546 goto retry;
547 }
548 537
549 cap->issued = 0; 538 cap->issued = 0;
550 cap->implemented = 0; 539 cap->implemented = 0;
@@ -562,9 +551,6 @@ retry:
562 session->s_nr_caps++; 551 session->s_nr_caps++;
563 spin_unlock(&session->s_cap_lock); 552 spin_unlock(&session->s_cap_lock);
564 } else { 553 } else {
565 if (new_cap)
566 ceph_put_cap(mdsc, new_cap);
567
568 /* 554 /*
569 * auth mds of the inode changed. we received the cap export 555 * auth mds of the inode changed. we received the cap export
570 * message, but still haven't received the cap import message. 556 * message, but still haven't received the cap import message.
@@ -626,7 +612,6 @@ retry:
626 ci->i_auth_cap = cap; 612 ci->i_auth_cap = cap;
627 cap->mds_wanted = wanted; 613 cap->mds_wanted = wanted;
628 } 614 }
629 ci->i_cap_exporting_issued = 0;
630 } else { 615 } else {
631 WARN_ON(ci->i_auth_cap == cap); 616 WARN_ON(ci->i_auth_cap == cap);
632 } 617 }
@@ -648,9 +633,6 @@ retry:
648 633
649 if (fmode >= 0) 634 if (fmode >= 0)
650 __ceph_get_fmode(ci, fmode); 635 __ceph_get_fmode(ci, fmode);
651 spin_unlock(&ci->i_ceph_lock);
652 wake_up_all(&ci->i_cap_wq);
653 return 0;
654} 636}
655 637
656/* 638/*
@@ -685,7 +667,7 @@ static int __cap_is_valid(struct ceph_cap *cap)
685 */ 667 */
686int __ceph_caps_issued(struct ceph_inode_info *ci, int *implemented) 668int __ceph_caps_issued(struct ceph_inode_info *ci, int *implemented)
687{ 669{
688 int have = ci->i_snap_caps | ci->i_cap_exporting_issued; 670 int have = ci->i_snap_caps;
689 struct ceph_cap *cap; 671 struct ceph_cap *cap;
690 struct rb_node *p; 672 struct rb_node *p;
691 673
@@ -900,7 +882,7 @@ int __ceph_caps_mds_wanted(struct ceph_inode_info *ci)
900 */ 882 */
901static int __ceph_is_any_caps(struct ceph_inode_info *ci) 883static int __ceph_is_any_caps(struct ceph_inode_info *ci)
902{ 884{
903 return !RB_EMPTY_ROOT(&ci->i_caps) || ci->i_cap_exporting_issued; 885 return !RB_EMPTY_ROOT(&ci->i_caps);
904} 886}
905 887
906int ceph_is_any_caps(struct inode *inode) 888int ceph_is_any_caps(struct inode *inode)
@@ -2397,32 +2379,30 @@ static void invalidate_aliases(struct inode *inode)
2397 * actually be a revocation if it specifies a smaller cap set.) 2379 * actually be a revocation if it specifies a smaller cap set.)
2398 * 2380 *
2399 * caller holds s_mutex and i_ceph_lock, we drop both. 2381 * caller holds s_mutex and i_ceph_lock, we drop both.
2400 *
2401 * return value:
2402 * 0 - ok
2403 * 1 - check_caps on auth cap only (writeback)
2404 * 2 - check_caps (ack revoke)
2405 */ 2382 */
2406static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, 2383static void handle_cap_grant(struct ceph_mds_client *mdsc,
2384 struct inode *inode, struct ceph_mds_caps *grant,
2385 void *snaptrace, int snaptrace_len,
2386 struct ceph_buffer *xattr_buf,
2407 struct ceph_mds_session *session, 2387 struct ceph_mds_session *session,
2408 struct ceph_cap *cap, 2388 struct ceph_cap *cap, int issued)
2409 struct ceph_buffer *xattr_buf) 2389 __releases(ci->i_ceph_lock)
2410 __releases(ci->i_ceph_lock)
2411{ 2390{
2412 struct ceph_inode_info *ci = ceph_inode(inode); 2391 struct ceph_inode_info *ci = ceph_inode(inode);
2413 int mds = session->s_mds; 2392 int mds = session->s_mds;
2414 int seq = le32_to_cpu(grant->seq); 2393 int seq = le32_to_cpu(grant->seq);
2415 int newcaps = le32_to_cpu(grant->caps); 2394 int newcaps = le32_to_cpu(grant->caps);
2416 int issued, implemented, used, wanted, dirty; 2395 int used, wanted, dirty;
2417 u64 size = le64_to_cpu(grant->size); 2396 u64 size = le64_to_cpu(grant->size);
2418 u64 max_size = le64_to_cpu(grant->max_size); 2397 u64 max_size = le64_to_cpu(grant->max_size);
2419 struct timespec mtime, atime, ctime; 2398 struct timespec mtime, atime, ctime;
2420 int check_caps = 0; 2399 int check_caps = 0;
2421 int wake = 0; 2400 bool wake = 0;
2422 int writeback = 0; 2401 bool writeback = 0;
2423 int queue_invalidate = 0; 2402 bool queue_trunc = 0;
2424 int deleted_inode = 0; 2403 bool queue_invalidate = 0;
2425 int queue_revalidate = 0; 2404 bool queue_revalidate = 0;
2405 bool deleted_inode = 0;
2426 2406
2427 dout("handle_cap_grant inode %p cap %p mds%d seq %d %s\n", 2407 dout("handle_cap_grant inode %p cap %p mds%d seq %d %s\n",
2428 inode, cap, mds, seq, ceph_cap_string(newcaps)); 2408 inode, cap, mds, seq, ceph_cap_string(newcaps));
@@ -2466,16 +2446,13 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
2466 } 2446 }
2467 2447
2468 /* side effects now are allowed */ 2448 /* side effects now are allowed */
2469
2470 issued = __ceph_caps_issued(ci, &implemented);
2471 issued |= implemented | __ceph_caps_dirty(ci);
2472
2473 cap->cap_gen = session->s_cap_gen; 2449 cap->cap_gen = session->s_cap_gen;
2474 cap->seq = seq; 2450 cap->seq = seq;
2475 2451
2476 __check_cap_issue(ci, cap, newcaps); 2452 __check_cap_issue(ci, cap, newcaps);
2477 2453
2478 if ((issued & CEPH_CAP_AUTH_EXCL) == 0) { 2454 if ((newcaps & CEPH_CAP_AUTH_SHARED) &&
2455 (issued & CEPH_CAP_AUTH_EXCL) == 0) {
2479 inode->i_mode = le32_to_cpu(grant->mode); 2456 inode->i_mode = le32_to_cpu(grant->mode);
2480 inode->i_uid = make_kuid(&init_user_ns, le32_to_cpu(grant->uid)); 2457 inode->i_uid = make_kuid(&init_user_ns, le32_to_cpu(grant->uid));
2481 inode->i_gid = make_kgid(&init_user_ns, le32_to_cpu(grant->gid)); 2458 inode->i_gid = make_kgid(&init_user_ns, le32_to_cpu(grant->gid));
@@ -2484,7 +2461,8 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
2484 from_kgid(&init_user_ns, inode->i_gid)); 2461 from_kgid(&init_user_ns, inode->i_gid));
2485 } 2462 }
2486 2463
2487 if ((issued & CEPH_CAP_LINK_EXCL) == 0) { 2464 if ((newcaps & CEPH_CAP_AUTH_SHARED) &&
2465 (issued & CEPH_CAP_LINK_EXCL) == 0) {
2488 set_nlink(inode, le32_to_cpu(grant->nlink)); 2466 set_nlink(inode, le32_to_cpu(grant->nlink));
2489 if (inode->i_nlink == 0 && 2467 if (inode->i_nlink == 0 &&
2490 (newcaps & (CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL))) 2468 (newcaps & (CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL)))
@@ -2511,30 +2489,35 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
2511 if ((issued & CEPH_CAP_FILE_CACHE) && ci->i_rdcache_gen > 1) 2489 if ((issued & CEPH_CAP_FILE_CACHE) && ci->i_rdcache_gen > 1)
2512 queue_revalidate = 1; 2490 queue_revalidate = 1;
2513 2491
2514 /* size/ctime/mtime/atime? */ 2492 if (newcaps & CEPH_CAP_ANY_RD) {
2515 ceph_fill_file_size(inode, issued, 2493 /* ctime/mtime/atime? */
2516 le32_to_cpu(grant->truncate_seq), 2494 ceph_decode_timespec(&mtime, &grant->mtime);
2517 le64_to_cpu(grant->truncate_size), size); 2495 ceph_decode_timespec(&atime, &grant->atime);
2518 ceph_decode_timespec(&mtime, &grant->mtime); 2496 ceph_decode_timespec(&ctime, &grant->ctime);
2519 ceph_decode_timespec(&atime, &grant->atime); 2497 ceph_fill_file_time(inode, issued,
2520 ceph_decode_timespec(&ctime, &grant->ctime); 2498 le32_to_cpu(grant->time_warp_seq),
2521 ceph_fill_file_time(inode, issued, 2499 &ctime, &mtime, &atime);
2522 le32_to_cpu(grant->time_warp_seq), &ctime, &mtime, 2500 }
2523 &atime); 2501
2524 2502 if (newcaps & (CEPH_CAP_ANY_FILE_RD | CEPH_CAP_ANY_FILE_WR)) {
2525 2503 /* file layout may have changed */
2526 /* file layout may have changed */ 2504 ci->i_layout = grant->layout;
2527 ci->i_layout = grant->layout; 2505 /* size/truncate_seq? */
2528 2506 queue_trunc = ceph_fill_file_size(inode, issued,
2529 /* max size increase? */ 2507 le32_to_cpu(grant->truncate_seq),
2530 if (ci->i_auth_cap == cap && max_size != ci->i_max_size) { 2508 le64_to_cpu(grant->truncate_size),
2531 dout("max_size %lld -> %llu\n", ci->i_max_size, max_size); 2509 size);
2532 ci->i_max_size = max_size; 2510 /* max size increase? */
2533 if (max_size >= ci->i_wanted_max_size) { 2511 if (ci->i_auth_cap == cap && max_size != ci->i_max_size) {
2534 ci->i_wanted_max_size = 0; /* reset */ 2512 dout("max_size %lld -> %llu\n",
2535 ci->i_requested_max_size = 0; 2513 ci->i_max_size, max_size);
2514 ci->i_max_size = max_size;
2515 if (max_size >= ci->i_wanted_max_size) {
2516 ci->i_wanted_max_size = 0; /* reset */
2517 ci->i_requested_max_size = 0;
2518 }
2519 wake = 1;
2536 } 2520 }
2537 wake = 1;
2538 } 2521 }
2539 2522
2540 /* check cap bits */ 2523 /* check cap bits */
@@ -2595,6 +2578,23 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
2595 2578
2596 spin_unlock(&ci->i_ceph_lock); 2579 spin_unlock(&ci->i_ceph_lock);
2597 2580
2581 if (le32_to_cpu(grant->op) == CEPH_CAP_OP_IMPORT) {
2582 down_write(&mdsc->snap_rwsem);
2583 ceph_update_snap_trace(mdsc, snaptrace,
2584 snaptrace + snaptrace_len, false);
2585 downgrade_write(&mdsc->snap_rwsem);
2586 kick_flushing_inode_caps(mdsc, session, inode);
2587 up_read(&mdsc->snap_rwsem);
2588 if (newcaps & ~issued)
2589 wake = 1;
2590 }
2591
2592 if (queue_trunc) {
2593 ceph_queue_vmtruncate(inode);
2594 ceph_queue_revalidate(inode);
2595 } else if (queue_revalidate)
2596 ceph_queue_revalidate(inode);
2597
2598 if (writeback) 2598 if (writeback)
2599 /* 2599 /*
2600 * queue inode for writeback: we can't actually call 2600 * queue inode for writeback: we can't actually call
@@ -2606,8 +2606,6 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
2606 ceph_queue_invalidate(inode); 2606 ceph_queue_invalidate(inode);
2607 if (deleted_inode) 2607 if (deleted_inode)
2608 invalidate_aliases(inode); 2608 invalidate_aliases(inode);
2609 if (queue_revalidate)
2610 ceph_queue_revalidate(inode);
2611 if (wake) 2609 if (wake)
2612 wake_up_all(&ci->i_cap_wq); 2610 wake_up_all(&ci->i_cap_wq);
2613 2611
@@ -2784,7 +2782,7 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex,
2784{ 2782{
2785 struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; 2783 struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
2786 struct ceph_mds_session *tsession = NULL; 2784 struct ceph_mds_session *tsession = NULL;
2787 struct ceph_cap *cap, *tcap; 2785 struct ceph_cap *cap, *tcap, *new_cap = NULL;
2788 struct ceph_inode_info *ci = ceph_inode(inode); 2786 struct ceph_inode_info *ci = ceph_inode(inode);
2789 u64 t_cap_id; 2787 u64 t_cap_id;
2790 unsigned mseq = le32_to_cpu(ex->migrate_seq); 2788 unsigned mseq = le32_to_cpu(ex->migrate_seq);
@@ -2807,7 +2805,7 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex,
2807retry: 2805retry:
2808 spin_lock(&ci->i_ceph_lock); 2806 spin_lock(&ci->i_ceph_lock);
2809 cap = __get_cap_for_mds(ci, mds); 2807 cap = __get_cap_for_mds(ci, mds);
2810 if (!cap) 2808 if (!cap || cap->cap_id != le64_to_cpu(ex->cap_id))
2811 goto out_unlock; 2809 goto out_unlock;
2812 2810
2813 if (target < 0) { 2811 if (target < 0) {
@@ -2846,15 +2844,14 @@ retry:
2846 } 2844 }
2847 __ceph_remove_cap(cap, false); 2845 __ceph_remove_cap(cap, false);
2848 goto out_unlock; 2846 goto out_unlock;
2849 } 2847 } else if (tsession) {
2850
2851 if (tsession) {
2852 int flag = (cap == ci->i_auth_cap) ? CEPH_CAP_FLAG_AUTH : 0;
2853 spin_unlock(&ci->i_ceph_lock);
2854 /* add placeholder for the export tagert */ 2848 /* add placeholder for the export tagert */
2849 int flag = (cap == ci->i_auth_cap) ? CEPH_CAP_FLAG_AUTH : 0;
2855 ceph_add_cap(inode, tsession, t_cap_id, -1, issued, 0, 2850 ceph_add_cap(inode, tsession, t_cap_id, -1, issued, 0,
2856 t_seq - 1, t_mseq, (u64)-1, flag, NULL); 2851 t_seq - 1, t_mseq, (u64)-1, flag, &new_cap);
2857 goto retry; 2852
2853 __ceph_remove_cap(cap, false);
2854 goto out_unlock;
2858 } 2855 }
2859 2856
2860 spin_unlock(&ci->i_ceph_lock); 2857 spin_unlock(&ci->i_ceph_lock);
@@ -2873,6 +2870,7 @@ retry:
2873 SINGLE_DEPTH_NESTING); 2870 SINGLE_DEPTH_NESTING);
2874 } 2871 }
2875 ceph_add_cap_releases(mdsc, tsession); 2872 ceph_add_cap_releases(mdsc, tsession);
2873 new_cap = ceph_get_cap(mdsc, NULL);
2876 } else { 2874 } else {
2877 WARN_ON(1); 2875 WARN_ON(1);
2878 tsession = NULL; 2876 tsession = NULL;
@@ -2887,24 +2885,27 @@ out_unlock:
2887 mutex_unlock(&tsession->s_mutex); 2885 mutex_unlock(&tsession->s_mutex);
2888 ceph_put_mds_session(tsession); 2886 ceph_put_mds_session(tsession);
2889 } 2887 }
2888 if (new_cap)
2889 ceph_put_cap(mdsc, new_cap);
2890} 2890}
2891 2891
2892/* 2892/*
2893 * Handle cap IMPORT. If there are temp bits from an older EXPORT, 2893 * Handle cap IMPORT.
2894 * clean them up.
2895 * 2894 *
2896 * caller holds s_mutex. 2895 * caller holds s_mutex. acquires i_ceph_lock
2897 */ 2896 */
2898static void handle_cap_import(struct ceph_mds_client *mdsc, 2897static void handle_cap_import(struct ceph_mds_client *mdsc,
2899 struct inode *inode, struct ceph_mds_caps *im, 2898 struct inode *inode, struct ceph_mds_caps *im,
2900 struct ceph_mds_cap_peer *ph, 2899 struct ceph_mds_cap_peer *ph,
2901 struct ceph_mds_session *session, 2900 struct ceph_mds_session *session,
2902 void *snaptrace, int snaptrace_len) 2901 struct ceph_cap **target_cap, int *old_issued)
2902 __acquires(ci->i_ceph_lock)
2903{ 2903{
2904 struct ceph_inode_info *ci = ceph_inode(inode); 2904 struct ceph_inode_info *ci = ceph_inode(inode);
2905 struct ceph_cap *cap; 2905 struct ceph_cap *cap, *ocap, *new_cap = NULL;
2906 int mds = session->s_mds; 2906 int mds = session->s_mds;
2907 unsigned issued = le32_to_cpu(im->caps); 2907 int issued;
2908 unsigned caps = le32_to_cpu(im->caps);
2908 unsigned wanted = le32_to_cpu(im->wanted); 2909 unsigned wanted = le32_to_cpu(im->wanted);
2909 unsigned seq = le32_to_cpu(im->seq); 2910 unsigned seq = le32_to_cpu(im->seq);
2910 unsigned mseq = le32_to_cpu(im->migrate_seq); 2911 unsigned mseq = le32_to_cpu(im->migrate_seq);
@@ -2924,40 +2925,52 @@ static void handle_cap_import(struct ceph_mds_client *mdsc,
2924 dout("handle_cap_import inode %p ci %p mds%d mseq %d peer %d\n", 2925 dout("handle_cap_import inode %p ci %p mds%d mseq %d peer %d\n",
2925 inode, ci, mds, mseq, peer); 2926 inode, ci, mds, mseq, peer);
2926 2927
2928retry:
2927 spin_lock(&ci->i_ceph_lock); 2929 spin_lock(&ci->i_ceph_lock);
2928 cap = peer >= 0 ? __get_cap_for_mds(ci, peer) : NULL; 2930 cap = __get_cap_for_mds(ci, mds);
2929 if (cap && cap->cap_id == p_cap_id) { 2931 if (!cap) {
2932 if (!new_cap) {
2933 spin_unlock(&ci->i_ceph_lock);
2934 new_cap = ceph_get_cap(mdsc, NULL);
2935 goto retry;
2936 }
2937 cap = new_cap;
2938 } else {
2939 if (new_cap) {
2940 ceph_put_cap(mdsc, new_cap);
2941 new_cap = NULL;
2942 }
2943 }
2944
2945 __ceph_caps_issued(ci, &issued);
2946 issued |= __ceph_caps_dirty(ci);
2947
2948 ceph_add_cap(inode, session, cap_id, -1, caps, wanted, seq, mseq,
2949 realmino, CEPH_CAP_FLAG_AUTH, &new_cap);
2950
2951 ocap = peer >= 0 ? __get_cap_for_mds(ci, peer) : NULL;
2952 if (ocap && ocap->cap_id == p_cap_id) {
2930 dout(" remove export cap %p mds%d flags %d\n", 2953 dout(" remove export cap %p mds%d flags %d\n",
2931 cap, peer, ph->flags); 2954 ocap, peer, ph->flags);
2932 if ((ph->flags & CEPH_CAP_FLAG_AUTH) && 2955 if ((ph->flags & CEPH_CAP_FLAG_AUTH) &&
2933 (cap->seq != le32_to_cpu(ph->seq) || 2956 (ocap->seq != le32_to_cpu(ph->seq) ||
2934 cap->mseq != le32_to_cpu(ph->mseq))) { 2957 ocap->mseq != le32_to_cpu(ph->mseq))) {
2935 pr_err("handle_cap_import: mismatched seq/mseq: " 2958 pr_err("handle_cap_import: mismatched seq/mseq: "
2936 "ino (%llx.%llx) mds%d seq %d mseq %d " 2959 "ino (%llx.%llx) mds%d seq %d mseq %d "
2937 "importer mds%d has peer seq %d mseq %d\n", 2960 "importer mds%d has peer seq %d mseq %d\n",
2938 ceph_vinop(inode), peer, cap->seq, 2961 ceph_vinop(inode), peer, ocap->seq,
2939 cap->mseq, mds, le32_to_cpu(ph->seq), 2962 ocap->mseq, mds, le32_to_cpu(ph->seq),
2940 le32_to_cpu(ph->mseq)); 2963 le32_to_cpu(ph->mseq));
2941 } 2964 }
2942 ci->i_cap_exporting_issued = cap->issued; 2965 __ceph_remove_cap(ocap, (ph->flags & CEPH_CAP_FLAG_RELEASE));
2943 __ceph_remove_cap(cap, (ph->flags & CEPH_CAP_FLAG_RELEASE));
2944 } 2966 }
2945 2967
2946 /* make sure we re-request max_size, if necessary */ 2968 /* make sure we re-request max_size, if necessary */
2947 ci->i_wanted_max_size = 0; 2969 ci->i_wanted_max_size = 0;
2948 ci->i_requested_max_size = 0; 2970 ci->i_requested_max_size = 0;
2949 spin_unlock(&ci->i_ceph_lock);
2950
2951 down_write(&mdsc->snap_rwsem);
2952 ceph_update_snap_trace(mdsc, snaptrace, snaptrace+snaptrace_len,
2953 false);
2954 downgrade_write(&mdsc->snap_rwsem);
2955 ceph_add_cap(inode, session, cap_id, -1,
2956 issued, wanted, seq, mseq, realmino, CEPH_CAP_FLAG_AUTH,
2957 NULL /* no caps context */);
2958 kick_flushing_inode_caps(mdsc, session, inode);
2959 up_read(&mdsc->snap_rwsem);
2960 2971
2972 *old_issued = issued;
2973 *target_cap = cap;
2961} 2974}
2962 2975
2963/* 2976/*
@@ -2977,7 +2990,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
2977 struct ceph_mds_caps *h; 2990 struct ceph_mds_caps *h;
2978 struct ceph_mds_cap_peer *peer = NULL; 2991 struct ceph_mds_cap_peer *peer = NULL;
2979 int mds = session->s_mds; 2992 int mds = session->s_mds;
2980 int op; 2993 int op, issued;
2981 u32 seq, mseq; 2994 u32 seq, mseq;
2982 struct ceph_vino vino; 2995 struct ceph_vino vino;
2983 u64 cap_id; 2996 u64 cap_id;
@@ -3069,7 +3082,10 @@ void ceph_handle_caps(struct ceph_mds_session *session,
3069 3082
3070 case CEPH_CAP_OP_IMPORT: 3083 case CEPH_CAP_OP_IMPORT:
3071 handle_cap_import(mdsc, inode, h, peer, session, 3084 handle_cap_import(mdsc, inode, h, peer, session,
3072 snaptrace, snaptrace_len); 3085 &cap, &issued);
3086 handle_cap_grant(mdsc, inode, h, snaptrace, snaptrace_len,
3087 msg->middle, session, cap, issued);
3088 goto done_unlocked;
3073 } 3089 }
3074 3090
3075 /* the rest require a cap */ 3091 /* the rest require a cap */
@@ -3086,8 +3102,10 @@ void ceph_handle_caps(struct ceph_mds_session *session,
3086 switch (op) { 3102 switch (op) {
3087 case CEPH_CAP_OP_REVOKE: 3103 case CEPH_CAP_OP_REVOKE:
3088 case CEPH_CAP_OP_GRANT: 3104 case CEPH_CAP_OP_GRANT:
3089 case CEPH_CAP_OP_IMPORT: 3105 __ceph_caps_issued(ci, &issued);
3090 handle_cap_grant(inode, h, session, cap, msg->middle); 3106 issued |= __ceph_caps_dirty(ci);
3107 handle_cap_grant(mdsc, inode, h, NULL, 0, msg->middle,
3108 session, cap, issued);
3091 goto done_unlocked; 3109 goto done_unlocked;
3092 3110
3093 case CEPH_CAP_OP_FLUSH_ACK: 3111 case CEPH_CAP_OP_FLUSH_ACK:
diff --git a/fs/ceph/export.c b/fs/ceph/export.c
index 00d6af6a32ec..8d7d782f4382 100644
--- a/fs/ceph/export.c
+++ b/fs/ceph/export.c
@@ -169,7 +169,7 @@ static struct dentry *__get_parent(struct super_block *sb,
169 return dentry; 169 return dentry;
170} 170}
171 171
172struct dentry *ceph_get_parent(struct dentry *child) 172static struct dentry *ceph_get_parent(struct dentry *child)
173{ 173{
174 /* don't re-export snaps */ 174 /* don't re-export snaps */
175 if (ceph_snap(child->d_inode) != CEPH_NOSNAP) 175 if (ceph_snap(child->d_inode) != CEPH_NOSNAP)
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index e4fff9ff1c27..04c89c266cec 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -10,6 +10,7 @@
10#include <linux/writeback.h> 10#include <linux/writeback.h>
11#include <linux/vmalloc.h> 11#include <linux/vmalloc.h>
12#include <linux/posix_acl.h> 12#include <linux/posix_acl.h>
13#include <linux/random.h>
13 14
14#include "super.h" 15#include "super.h"
15#include "mds_client.h" 16#include "mds_client.h"
@@ -179,9 +180,8 @@ struct ceph_inode_frag *__ceph_find_frag(struct ceph_inode_info *ci, u32 f)
179 * specified, copy the frag delegation info to the caller if 180 * specified, copy the frag delegation info to the caller if
180 * it is present. 181 * it is present.
181 */ 182 */
182u32 ceph_choose_frag(struct ceph_inode_info *ci, u32 v, 183static u32 __ceph_choose_frag(struct ceph_inode_info *ci, u32 v,
183 struct ceph_inode_frag *pfrag, 184 struct ceph_inode_frag *pfrag, int *found)
184 int *found)
185{ 185{
186 u32 t = ceph_frag_make(0, 0); 186 u32 t = ceph_frag_make(0, 0);
187 struct ceph_inode_frag *frag; 187 struct ceph_inode_frag *frag;
@@ -191,7 +191,6 @@ u32 ceph_choose_frag(struct ceph_inode_info *ci, u32 v,
191 if (found) 191 if (found)
192 *found = 0; 192 *found = 0;
193 193
194 mutex_lock(&ci->i_fragtree_mutex);
195 while (1) { 194 while (1) {
196 WARN_ON(!ceph_frag_contains_value(t, v)); 195 WARN_ON(!ceph_frag_contains_value(t, v));
197 frag = __ceph_find_frag(ci, t); 196 frag = __ceph_find_frag(ci, t);
@@ -220,10 +219,19 @@ u32 ceph_choose_frag(struct ceph_inode_info *ci, u32 v,
220 } 219 }
221 dout("choose_frag(%x) = %x\n", v, t); 220 dout("choose_frag(%x) = %x\n", v, t);
222 221
223 mutex_unlock(&ci->i_fragtree_mutex);
224 return t; 222 return t;
225} 223}
226 224
225u32 ceph_choose_frag(struct ceph_inode_info *ci, u32 v,
226 struct ceph_inode_frag *pfrag, int *found)
227{
228 u32 ret;
229 mutex_lock(&ci->i_fragtree_mutex);
230 ret = __ceph_choose_frag(ci, v, pfrag, found);
231 mutex_unlock(&ci->i_fragtree_mutex);
232 return ret;
233}
234
227/* 235/*
228 * Process dirfrag (delegation) info from the mds. Include leaf 236 * Process dirfrag (delegation) info from the mds. Include leaf
229 * fragment in tree ONLY if ndist > 0. Otherwise, only 237 * fragment in tree ONLY if ndist > 0. Otherwise, only
@@ -237,11 +245,17 @@ static int ceph_fill_dirfrag(struct inode *inode,
237 u32 id = le32_to_cpu(dirinfo->frag); 245 u32 id = le32_to_cpu(dirinfo->frag);
238 int mds = le32_to_cpu(dirinfo->auth); 246 int mds = le32_to_cpu(dirinfo->auth);
239 int ndist = le32_to_cpu(dirinfo->ndist); 247 int ndist = le32_to_cpu(dirinfo->ndist);
248 int diri_auth = -1;
240 int i; 249 int i;
241 int err = 0; 250 int err = 0;
242 251
252 spin_lock(&ci->i_ceph_lock);
253 if (ci->i_auth_cap)
254 diri_auth = ci->i_auth_cap->mds;
255 spin_unlock(&ci->i_ceph_lock);
256
243 mutex_lock(&ci->i_fragtree_mutex); 257 mutex_lock(&ci->i_fragtree_mutex);
244 if (ndist == 0) { 258 if (ndist == 0 && mds == diri_auth) {
245 /* no delegation info needed. */ 259 /* no delegation info needed. */
246 frag = __ceph_find_frag(ci, id); 260 frag = __ceph_find_frag(ci, id);
247 if (!frag) 261 if (!frag)
@@ -286,6 +300,75 @@ out:
286 return err; 300 return err;
287} 301}
288 302
303static int ceph_fill_fragtree(struct inode *inode,
304 struct ceph_frag_tree_head *fragtree,
305 struct ceph_mds_reply_dirfrag *dirinfo)
306{
307 struct ceph_inode_info *ci = ceph_inode(inode);
308 struct ceph_inode_frag *frag;
309 struct rb_node *rb_node;
310 int i;
311 u32 id, nsplits;
312 bool update = false;
313
314 mutex_lock(&ci->i_fragtree_mutex);
315 nsplits = le32_to_cpu(fragtree->nsplits);
316 if (nsplits) {
317 i = prandom_u32() % nsplits;
318 id = le32_to_cpu(fragtree->splits[i].frag);
319 if (!__ceph_find_frag(ci, id))
320 update = true;
321 } else if (!RB_EMPTY_ROOT(&ci->i_fragtree)) {
322 rb_node = rb_first(&ci->i_fragtree);
323 frag = rb_entry(rb_node, struct ceph_inode_frag, node);
324 if (frag->frag != ceph_frag_make(0, 0) || rb_next(rb_node))
325 update = true;
326 }
327 if (!update && dirinfo) {
328 id = le32_to_cpu(dirinfo->frag);
329 if (id != __ceph_choose_frag(ci, id, NULL, NULL))
330 update = true;
331 }
332 if (!update)
333 goto out_unlock;
334
335 dout("fill_fragtree %llx.%llx\n", ceph_vinop(inode));
336 rb_node = rb_first(&ci->i_fragtree);
337 for (i = 0; i < nsplits; i++) {
338 id = le32_to_cpu(fragtree->splits[i].frag);
339 frag = NULL;
340 while (rb_node) {
341 frag = rb_entry(rb_node, struct ceph_inode_frag, node);
342 if (ceph_frag_compare(frag->frag, id) >= 0) {
343 if (frag->frag != id)
344 frag = NULL;
345 else
346 rb_node = rb_next(rb_node);
347 break;
348 }
349 rb_node = rb_next(rb_node);
350 rb_erase(&frag->node, &ci->i_fragtree);
351 kfree(frag);
352 frag = NULL;
353 }
354 if (!frag) {
355 frag = __get_or_create_frag(ci, id);
356 if (IS_ERR(frag))
357 continue;
358 }
359 frag->split_by = le32_to_cpu(fragtree->splits[i].by);
360 dout(" frag %x split by %d\n", frag->frag, frag->split_by);
361 }
362 while (rb_node) {
363 frag = rb_entry(rb_node, struct ceph_inode_frag, node);
364 rb_node = rb_next(rb_node);
365 rb_erase(&frag->node, &ci->i_fragtree);
366 kfree(frag);
367 }
368out_unlock:
369 mutex_unlock(&ci->i_fragtree_mutex);
370 return 0;
371}
289 372
290/* 373/*
291 * initialize a newly allocated inode. 374 * initialize a newly allocated inode.
@@ -341,7 +424,6 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
341 INIT_LIST_HEAD(&ci->i_cap_snaps); 424 INIT_LIST_HEAD(&ci->i_cap_snaps);
342 ci->i_head_snapc = NULL; 425 ci->i_head_snapc = NULL;
343 ci->i_snap_caps = 0; 426 ci->i_snap_caps = 0;
344 ci->i_cap_exporting_issued = 0;
345 427
346 for (i = 0; i < CEPH_FILE_MODE_NUM; i++) 428 for (i = 0; i < CEPH_FILE_MODE_NUM; i++)
347 ci->i_nr_by_mode[i] = 0; 429 ci->i_nr_by_mode[i] = 0;
@@ -407,7 +489,7 @@ void ceph_destroy_inode(struct inode *inode)
407 489
408 /* 490 /*
409 * we may still have a snap_realm reference if there are stray 491 * we may still have a snap_realm reference if there are stray
410 * caps in i_cap_exporting_issued or i_snap_caps. 492 * caps in i_snap_caps.
411 */ 493 */
412 if (ci->i_snap_realm) { 494 if (ci->i_snap_realm) {
413 struct ceph_mds_client *mdsc = 495 struct ceph_mds_client *mdsc =
@@ -582,22 +664,26 @@ static int fill_inode(struct inode *inode,
582 unsigned long ttl_from, int cap_fmode, 664 unsigned long ttl_from, int cap_fmode,
583 struct ceph_cap_reservation *caps_reservation) 665 struct ceph_cap_reservation *caps_reservation)
584{ 666{
667 struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
585 struct ceph_mds_reply_inode *info = iinfo->in; 668 struct ceph_mds_reply_inode *info = iinfo->in;
586 struct ceph_inode_info *ci = ceph_inode(inode); 669 struct ceph_inode_info *ci = ceph_inode(inode);
587 int i; 670 int issued = 0, implemented, new_issued;
588 int issued = 0, implemented;
589 struct timespec mtime, atime, ctime; 671 struct timespec mtime, atime, ctime;
590 u32 nsplits;
591 struct ceph_inode_frag *frag;
592 struct rb_node *rb_node;
593 struct ceph_buffer *xattr_blob = NULL; 672 struct ceph_buffer *xattr_blob = NULL;
673 struct ceph_cap *new_cap = NULL;
594 int err = 0; 674 int err = 0;
595 int queue_trunc = 0; 675 bool wake = false;
676 bool queue_trunc = false;
677 bool new_version = false;
596 678
597 dout("fill_inode %p ino %llx.%llx v %llu had %llu\n", 679 dout("fill_inode %p ino %llx.%llx v %llu had %llu\n",
598 inode, ceph_vinop(inode), le64_to_cpu(info->version), 680 inode, ceph_vinop(inode), le64_to_cpu(info->version),
599 ci->i_version); 681 ci->i_version);
600 682
683 /* prealloc new cap struct */
684 if (info->cap.caps && ceph_snap(inode) == CEPH_NOSNAP)
685 new_cap = ceph_get_cap(mdsc, caps_reservation);
686
601 /* 687 /*
602 * prealloc xattr data, if it looks like we'll need it. only 688 * prealloc xattr data, if it looks like we'll need it. only
603 * if len > 4 (meaning there are actually xattrs; the first 4 689 * if len > 4 (meaning there are actually xattrs; the first 4
@@ -623,19 +709,23 @@ static int fill_inode(struct inode *inode,
623 * 3 2 skip 709 * 3 2 skip
624 * 3 3 update 710 * 3 3 update
625 */ 711 */
626 if (le64_to_cpu(info->version) > 0 && 712 if (ci->i_version == 0 ||
627 (ci->i_version & ~1) >= le64_to_cpu(info->version)) 713 ((info->cap.flags & CEPH_CAP_FLAG_AUTH) &&
628 goto no_change; 714 le64_to_cpu(info->version) > (ci->i_version & ~1)))
629 715 new_version = true;
716
630 issued = __ceph_caps_issued(ci, &implemented); 717 issued = __ceph_caps_issued(ci, &implemented);
631 issued |= implemented | __ceph_caps_dirty(ci); 718 issued |= implemented | __ceph_caps_dirty(ci);
719 new_issued = ~issued & le32_to_cpu(info->cap.caps);
632 720
633 /* update inode */ 721 /* update inode */
634 ci->i_version = le64_to_cpu(info->version); 722 ci->i_version = le64_to_cpu(info->version);
635 inode->i_version++; 723 inode->i_version++;
636 inode->i_rdev = le32_to_cpu(info->rdev); 724 inode->i_rdev = le32_to_cpu(info->rdev);
725 inode->i_blkbits = fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1;
637 726
638 if ((issued & CEPH_CAP_AUTH_EXCL) == 0) { 727 if ((new_version || (new_issued & CEPH_CAP_AUTH_SHARED)) &&
728 (issued & CEPH_CAP_AUTH_EXCL) == 0) {
639 inode->i_mode = le32_to_cpu(info->mode); 729 inode->i_mode = le32_to_cpu(info->mode);
640 inode->i_uid = make_kuid(&init_user_ns, le32_to_cpu(info->uid)); 730 inode->i_uid = make_kuid(&init_user_ns, le32_to_cpu(info->uid));
641 inode->i_gid = make_kgid(&init_user_ns, le32_to_cpu(info->gid)); 731 inode->i_gid = make_kgid(&init_user_ns, le32_to_cpu(info->gid));
@@ -644,23 +734,35 @@ static int fill_inode(struct inode *inode,
644 from_kgid(&init_user_ns, inode->i_gid)); 734 from_kgid(&init_user_ns, inode->i_gid));
645 } 735 }
646 736
647 if ((issued & CEPH_CAP_LINK_EXCL) == 0) 737 if ((new_version || (new_issued & CEPH_CAP_LINK_SHARED)) &&
738 (issued & CEPH_CAP_LINK_EXCL) == 0)
648 set_nlink(inode, le32_to_cpu(info->nlink)); 739 set_nlink(inode, le32_to_cpu(info->nlink));
649 740
650 /* be careful with mtime, atime, size */ 741 if (new_version || (new_issued & CEPH_CAP_ANY_RD)) {
651 ceph_decode_timespec(&atime, &info->atime); 742 /* be careful with mtime, atime, size */
652 ceph_decode_timespec(&mtime, &info->mtime); 743 ceph_decode_timespec(&atime, &info->atime);
653 ceph_decode_timespec(&ctime, &info->ctime); 744 ceph_decode_timespec(&mtime, &info->mtime);
654 queue_trunc = ceph_fill_file_size(inode, issued, 745 ceph_decode_timespec(&ctime, &info->ctime);
655 le32_to_cpu(info->truncate_seq), 746 ceph_fill_file_time(inode, issued,
656 le64_to_cpu(info->truncate_size), 747 le32_to_cpu(info->time_warp_seq),
657 le64_to_cpu(info->size)); 748 &ctime, &mtime, &atime);
658 ceph_fill_file_time(inode, issued, 749 }
659 le32_to_cpu(info->time_warp_seq), 750
660 &ctime, &mtime, &atime); 751 if (new_version ||
661 752 (new_issued & (CEPH_CAP_ANY_FILE_RD | CEPH_CAP_ANY_FILE_WR))) {
662 ci->i_layout = info->layout; 753 ci->i_layout = info->layout;
663 inode->i_blkbits = fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1; 754 queue_trunc = ceph_fill_file_size(inode, issued,
755 le32_to_cpu(info->truncate_seq),
756 le64_to_cpu(info->truncate_size),
757 le64_to_cpu(info->size));
758 /* only update max_size on auth cap */
759 if ((info->cap.flags & CEPH_CAP_FLAG_AUTH) &&
760 ci->i_max_size != le64_to_cpu(info->max_size)) {
761 dout("max_size %lld -> %llu\n", ci->i_max_size,
762 le64_to_cpu(info->max_size));
763 ci->i_max_size = le64_to_cpu(info->max_size);
764 }
765 }
664 766
665 /* xattrs */ 767 /* xattrs */
666 /* note that if i_xattrs.len <= 4, i_xattrs.data will still be NULL. */ 768 /* note that if i_xattrs.len <= 4, i_xattrs.data will still be NULL. */
@@ -745,58 +847,6 @@ static int fill_inode(struct inode *inode,
745 dout(" marking %p complete (empty)\n", inode); 847 dout(" marking %p complete (empty)\n", inode);
746 __ceph_dir_set_complete(ci, atomic_read(&ci->i_release_count)); 848 __ceph_dir_set_complete(ci, atomic_read(&ci->i_release_count));
747 } 849 }
748no_change:
749 /* only update max_size on auth cap */
750 if ((info->cap.flags & CEPH_CAP_FLAG_AUTH) &&
751 ci->i_max_size != le64_to_cpu(info->max_size)) {
752 dout("max_size %lld -> %llu\n", ci->i_max_size,
753 le64_to_cpu(info->max_size));
754 ci->i_max_size = le64_to_cpu(info->max_size);
755 }
756
757 spin_unlock(&ci->i_ceph_lock);
758
759 /* queue truncate if we saw i_size decrease */
760 if (queue_trunc)
761 ceph_queue_vmtruncate(inode);
762
763 /* populate frag tree */
764 /* FIXME: move me up, if/when version reflects fragtree changes */
765 nsplits = le32_to_cpu(info->fragtree.nsplits);
766 mutex_lock(&ci->i_fragtree_mutex);
767 rb_node = rb_first(&ci->i_fragtree);
768 for (i = 0; i < nsplits; i++) {
769 u32 id = le32_to_cpu(info->fragtree.splits[i].frag);
770 frag = NULL;
771 while (rb_node) {
772 frag = rb_entry(rb_node, struct ceph_inode_frag, node);
773 if (ceph_frag_compare(frag->frag, id) >= 0) {
774 if (frag->frag != id)
775 frag = NULL;
776 else
777 rb_node = rb_next(rb_node);
778 break;
779 }
780 rb_node = rb_next(rb_node);
781 rb_erase(&frag->node, &ci->i_fragtree);
782 kfree(frag);
783 frag = NULL;
784 }
785 if (!frag) {
786 frag = __get_or_create_frag(ci, id);
787 if (IS_ERR(frag))
788 continue;
789 }
790 frag->split_by = le32_to_cpu(info->fragtree.splits[i].by);
791 dout(" frag %x split by %d\n", frag->frag, frag->split_by);
792 }
793 while (rb_node) {
794 frag = rb_entry(rb_node, struct ceph_inode_frag, node);
795 rb_node = rb_next(rb_node);
796 rb_erase(&frag->node, &ci->i_fragtree);
797 kfree(frag);
798 }
799 mutex_unlock(&ci->i_fragtree_mutex);
800 850
801 /* were we issued a capability? */ 851 /* were we issued a capability? */
802 if (info->cap.caps) { 852 if (info->cap.caps) {
@@ -809,30 +859,41 @@ no_change:
809 le32_to_cpu(info->cap.seq), 859 le32_to_cpu(info->cap.seq),
810 le32_to_cpu(info->cap.mseq), 860 le32_to_cpu(info->cap.mseq),
811 le64_to_cpu(info->cap.realm), 861 le64_to_cpu(info->cap.realm),
812 info->cap.flags, 862 info->cap.flags, &new_cap);
813 caps_reservation); 863 wake = true;
814 } else { 864 } else {
815 spin_lock(&ci->i_ceph_lock);
816 dout(" %p got snap_caps %s\n", inode, 865 dout(" %p got snap_caps %s\n", inode,
817 ceph_cap_string(le32_to_cpu(info->cap.caps))); 866 ceph_cap_string(le32_to_cpu(info->cap.caps)));
818 ci->i_snap_caps |= le32_to_cpu(info->cap.caps); 867 ci->i_snap_caps |= le32_to_cpu(info->cap.caps);
819 if (cap_fmode >= 0) 868 if (cap_fmode >= 0)
820 __ceph_get_fmode(ci, cap_fmode); 869 __ceph_get_fmode(ci, cap_fmode);
821 spin_unlock(&ci->i_ceph_lock);
822 } 870 }
823 } else if (cap_fmode >= 0) { 871 } else if (cap_fmode >= 0) {
824 pr_warn("mds issued no caps on %llx.%llx\n", 872 pr_warn("mds issued no caps on %llx.%llx\n",
825 ceph_vinop(inode)); 873 ceph_vinop(inode));
826 __ceph_get_fmode(ci, cap_fmode); 874 __ceph_get_fmode(ci, cap_fmode);
827 } 875 }
876 spin_unlock(&ci->i_ceph_lock);
877
878 if (wake)
879 wake_up_all(&ci->i_cap_wq);
880
881 /* queue truncate if we saw i_size decrease */
882 if (queue_trunc)
883 ceph_queue_vmtruncate(inode);
884
885 /* populate frag tree */
886 if (S_ISDIR(inode->i_mode))
887 ceph_fill_fragtree(inode, &info->fragtree, dirinfo);
828 888
829 /* update delegation info? */ 889 /* update delegation info? */
830 if (dirinfo) 890 if (dirinfo)
831 ceph_fill_dirfrag(inode, dirinfo); 891 ceph_fill_dirfrag(inode, dirinfo);
832 892
833 err = 0; 893 err = 0;
834
835out: 894out:
895 if (new_cap)
896 ceph_put_cap(mdsc, new_cap);
836 if (xattr_blob) 897 if (xattr_blob)
837 ceph_buffer_put(xattr_blob); 898 ceph_buffer_put(xattr_blob);
838 return err; 899 return err;
@@ -1485,7 +1546,7 @@ static void ceph_invalidate_work(struct work_struct *work)
1485 orig_gen = ci->i_rdcache_gen; 1546 orig_gen = ci->i_rdcache_gen;
1486 spin_unlock(&ci->i_ceph_lock); 1547 spin_unlock(&ci->i_ceph_lock);
1487 1548
1488 truncate_inode_pages(inode->i_mapping, 0); 1549 truncate_pagecache(inode, 0);
1489 1550
1490 spin_lock(&ci->i_ceph_lock); 1551 spin_lock(&ci->i_ceph_lock);
1491 if (orig_gen == ci->i_rdcache_gen && 1552 if (orig_gen == ci->i_rdcache_gen &&
@@ -1588,7 +1649,7 @@ retry:
1588 ci->i_truncate_pending, to); 1649 ci->i_truncate_pending, to);
1589 spin_unlock(&ci->i_ceph_lock); 1650 spin_unlock(&ci->i_ceph_lock);
1590 1651
1591 truncate_inode_pages(inode->i_mapping, to); 1652 truncate_pagecache(inode, to);
1592 1653
1593 spin_lock(&ci->i_ceph_lock); 1654 spin_lock(&ci->i_ceph_lock);
1594 if (to == ci->i_truncate_size) { 1655 if (to == ci->i_truncate_size) {
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 9a33b98cb000..92a2548278fc 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1558,6 +1558,8 @@ ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode)
1558 init_completion(&req->r_safe_completion); 1558 init_completion(&req->r_safe_completion);
1559 INIT_LIST_HEAD(&req->r_unsafe_item); 1559 INIT_LIST_HEAD(&req->r_unsafe_item);
1560 1560
1561 req->r_stamp = CURRENT_TIME;
1562
1561 req->r_op = op; 1563 req->r_op = op;
1562 req->r_direct_mode = mode; 1564 req->r_direct_mode = mode;
1563 return req; 1565 return req;
@@ -1783,7 +1785,8 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
1783 } 1785 }
1784 1786
1785 len = sizeof(*head) + 1787 len = sizeof(*head) +
1786 pathlen1 + pathlen2 + 2*(1 + sizeof(u32) + sizeof(u64)); 1788 pathlen1 + pathlen2 + 2*(1 + sizeof(u32) + sizeof(u64)) +
1789 sizeof(struct timespec);
1787 1790
1788 /* calculate (max) length for cap releases */ 1791 /* calculate (max) length for cap releases */
1789 len += sizeof(struct ceph_mds_request_release) * 1792 len += sizeof(struct ceph_mds_request_release) *
@@ -1800,6 +1803,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
1800 goto out_free2; 1803 goto out_free2;
1801 } 1804 }
1802 1805
1806 msg->hdr.version = 2;
1803 msg->hdr.tid = cpu_to_le64(req->r_tid); 1807 msg->hdr.tid = cpu_to_le64(req->r_tid);
1804 1808
1805 head = msg->front.iov_base; 1809 head = msg->front.iov_base;
@@ -1836,6 +1840,9 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
1836 mds, req->r_old_inode_drop, req->r_old_inode_unless, 0); 1840 mds, req->r_old_inode_drop, req->r_old_inode_unless, 0);
1837 head->num_releases = cpu_to_le16(releases); 1841 head->num_releases = cpu_to_le16(releases);
1838 1842
1843 /* time stamp */
1844 ceph_encode_copy(&p, &req->r_stamp, sizeof(req->r_stamp));
1845
1839 BUG_ON(p > end); 1846 BUG_ON(p > end);
1840 msg->front.iov_len = p - msg->front.iov_base; 1847 msg->front.iov_len = p - msg->front.iov_base;
1841 msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); 1848 msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index e90cfccf93bd..e00737cf523c 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -194,6 +194,7 @@ struct ceph_mds_request {
194 int r_fmode; /* file mode, if expecting cap */ 194 int r_fmode; /* file mode, if expecting cap */
195 kuid_t r_uid; 195 kuid_t r_uid;
196 kgid_t r_gid; 196 kgid_t r_gid;
197 struct timespec r_stamp;
197 198
198 /* for choosing which mds to send this request to */ 199 /* for choosing which mds to send this request to */
199 int r_direct_mode; 200 int r_direct_mode;
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index ead05cc1f447..12b20744e386 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -292,7 +292,6 @@ struct ceph_inode_info {
292 struct ceph_snap_context *i_head_snapc; /* set if wr_buffer_head > 0 or 292 struct ceph_snap_context *i_head_snapc; /* set if wr_buffer_head > 0 or
293 dirty|flushing caps */ 293 dirty|flushing caps */
294 unsigned i_snap_caps; /* cap bits for snapped files */ 294 unsigned i_snap_caps; /* cap bits for snapped files */
295 unsigned i_cap_exporting_issued;
296 295
297 int i_nr_by_mode[CEPH_FILE_MODE_NUM]; /* open file counts */ 296 int i_nr_by_mode[CEPH_FILE_MODE_NUM]; /* open file counts */
298 297
@@ -775,11 +774,13 @@ static inline void ceph_forget_all_cached_acls(struct inode *inode)
775extern const char *ceph_cap_string(int c); 774extern const char *ceph_cap_string(int c);
776extern void ceph_handle_caps(struct ceph_mds_session *session, 775extern void ceph_handle_caps(struct ceph_mds_session *session,
777 struct ceph_msg *msg); 776 struct ceph_msg *msg);
778extern int ceph_add_cap(struct inode *inode, 777extern struct ceph_cap *ceph_get_cap(struct ceph_mds_client *mdsc,
779 struct ceph_mds_session *session, u64 cap_id, 778 struct ceph_cap_reservation *ctx);
780 int fmode, unsigned issued, unsigned wanted, 779extern void ceph_add_cap(struct inode *inode,
781 unsigned cap, unsigned seq, u64 realmino, int flags, 780 struct ceph_mds_session *session, u64 cap_id,
782 struct ceph_cap_reservation *caps_reservation); 781 int fmode, unsigned issued, unsigned wanted,
782 unsigned cap, unsigned seq, u64 realmino, int flags,
783 struct ceph_cap **new_cap);
783extern void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release); 784extern void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release);
784extern void ceph_put_cap(struct ceph_mds_client *mdsc, 785extern void ceph_put_cap(struct ceph_mds_client *mdsc,
785 struct ceph_cap *cap); 786 struct ceph_cap *cap);