diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2014-06-13 02:06:23 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-06-13 02:06:23 -0400 |
commit | 6d87c225f5d82d29243dc124f1ffcbb0e14ec358 (patch) | |
tree | 7d72e2e6a77ec0911e86911d2ddae62c1b4161cf /fs/ceph | |
parent | 338c09a94b14c449dd53227e9bea44816668c6a5 (diff) | |
parent | 22001f619f29ddf66582d834223dcff4c0b74595 (diff) |
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client
Pull Ceph updates from Sage Weil:
"This has a mix of bug fixes and cleanups.
Alex's patch fixes a rare race in RBD. Ilya's patches fix an ENOENT
check when a second rbd image is mapped and a couple memory leaks.
Zheng fixes several issues with fragmented directories and multiple
MDSs. Josh fixes a spin/sleep issue, and Josh and Guangliang's
patches fix setting and unsetting RBD images read-only.
Naturally there are several other cleanups mixed in for good measure"
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client: (23 commits)
rbd: only set disk to read-only once
rbd: move calls that may sleep out of spin lock range
rbd: add ioctl for rbd
ceph: use truncate_pagecache() instead of truncate_inode_pages()
ceph: include time stamp in every MDS request
rbd: fix ida/idr memory leak
rbd: use reference counts for image requests
rbd: fix osd_request memory leak in __rbd_dev_header_watch_sync()
rbd: make sure we have latest osdmap on 'rbd map'
libceph: add ceph_monc_wait_osdmap()
libceph: mon_get_version request infrastructure
libceph: recognize poolop requests in debugfs
ceph: refactor readpage_nounlock() to make the logic clearer
mds: check cap ID when handling cap export message
ceph: remember subtree root dirfrag's auth MDS
ceph: introduce ceph_fill_fragtree()
ceph: handle cap import atomically
ceph: pre-allocate ceph_cap struct for ceph_add_cap()
ceph: update inode fields according to issued caps
rbd: replace IS_ERR and PTR_ERR with PTR_ERR_OR_ZERO
...
Diffstat (limited to 'fs/ceph')
-rw-r--r-- | fs/ceph/acl.c | 6 | ||||
-rw-r--r-- | fs/ceph/addr.c | 17 | ||||
-rw-r--r-- | fs/ceph/caps.c | 246 | ||||
-rw-r--r-- | fs/ceph/export.c | 2 | ||||
-rw-r--r-- | fs/ceph/inode.c | 247 | ||||
-rw-r--r-- | fs/ceph/mds_client.c | 9 | ||||
-rw-r--r-- | fs/ceph/mds_client.h | 1 | ||||
-rw-r--r-- | fs/ceph/super.h | 13 |
8 files changed, 310 insertions, 231 deletions
diff --git a/fs/ceph/acl.c b/fs/ceph/acl.c index 21887d63dad5..469f2e8657e8 100644 --- a/fs/ceph/acl.c +++ b/fs/ceph/acl.c | |||
@@ -104,12 +104,6 @@ int ceph_set_acl(struct inode *inode, struct posix_acl *acl, int type) | |||
104 | umode_t new_mode = inode->i_mode, old_mode = inode->i_mode; | 104 | umode_t new_mode = inode->i_mode, old_mode = inode->i_mode; |
105 | struct dentry *dentry; | 105 | struct dentry *dentry; |
106 | 106 | ||
107 | if (acl) { | ||
108 | ret = posix_acl_valid(acl); | ||
109 | if (ret < 0) | ||
110 | goto out; | ||
111 | } | ||
112 | |||
113 | switch (type) { | 107 | switch (type) { |
114 | case ACL_TYPE_ACCESS: | 108 | case ACL_TYPE_ACCESS: |
115 | name = POSIX_ACL_XATTR_ACCESS; | 109 | name = POSIX_ACL_XATTR_ACCESS; |
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 4f3f69079f36..90b3954d48ed 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c | |||
@@ -211,18 +211,15 @@ static int readpage_nounlock(struct file *filp, struct page *page) | |||
211 | SetPageError(page); | 211 | SetPageError(page); |
212 | ceph_fscache_readpage_cancel(inode, page); | 212 | ceph_fscache_readpage_cancel(inode, page); |
213 | goto out; | 213 | goto out; |
214 | } else { | ||
215 | if (err < PAGE_CACHE_SIZE) { | ||
216 | /* zero fill remainder of page */ | ||
217 | zero_user_segment(page, err, PAGE_CACHE_SIZE); | ||
218 | } else { | ||
219 | flush_dcache_page(page); | ||
220 | } | ||
221 | } | 214 | } |
222 | SetPageUptodate(page); | 215 | if (err < PAGE_CACHE_SIZE) |
216 | /* zero fill remainder of page */ | ||
217 | zero_user_segment(page, err, PAGE_CACHE_SIZE); | ||
218 | else | ||
219 | flush_dcache_page(page); | ||
223 | 220 | ||
224 | if (err >= 0) | 221 | SetPageUptodate(page); |
225 | ceph_readpage_to_fscache(inode, page); | 222 | ceph_readpage_to_fscache(inode, page); |
226 | 223 | ||
227 | out: | 224 | out: |
228 | return err < 0 ? err : 0; | 225 | return err < 0 ? err : 0; |
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index c561b628ebce..1fde164b74b5 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c | |||
@@ -221,8 +221,8 @@ int ceph_unreserve_caps(struct ceph_mds_client *mdsc, | |||
221 | return 0; | 221 | return 0; |
222 | } | 222 | } |
223 | 223 | ||
224 | static struct ceph_cap *get_cap(struct ceph_mds_client *mdsc, | 224 | struct ceph_cap *ceph_get_cap(struct ceph_mds_client *mdsc, |
225 | struct ceph_cap_reservation *ctx) | 225 | struct ceph_cap_reservation *ctx) |
226 | { | 226 | { |
227 | struct ceph_cap *cap = NULL; | 227 | struct ceph_cap *cap = NULL; |
228 | 228 | ||
@@ -508,15 +508,14 @@ static void __check_cap_issue(struct ceph_inode_info *ci, struct ceph_cap *cap, | |||
508 | * it is < 0. (This is so we can atomically add the cap and add an | 508 | * it is < 0. (This is so we can atomically add the cap and add an |
509 | * open file reference to it.) | 509 | * open file reference to it.) |
510 | */ | 510 | */ |
511 | int ceph_add_cap(struct inode *inode, | 511 | void ceph_add_cap(struct inode *inode, |
512 | struct ceph_mds_session *session, u64 cap_id, | 512 | struct ceph_mds_session *session, u64 cap_id, |
513 | int fmode, unsigned issued, unsigned wanted, | 513 | int fmode, unsigned issued, unsigned wanted, |
514 | unsigned seq, unsigned mseq, u64 realmino, int flags, | 514 | unsigned seq, unsigned mseq, u64 realmino, int flags, |
515 | struct ceph_cap_reservation *caps_reservation) | 515 | struct ceph_cap **new_cap) |
516 | { | 516 | { |
517 | struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; | 517 | struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; |
518 | struct ceph_inode_info *ci = ceph_inode(inode); | 518 | struct ceph_inode_info *ci = ceph_inode(inode); |
519 | struct ceph_cap *new_cap = NULL; | ||
520 | struct ceph_cap *cap; | 519 | struct ceph_cap *cap; |
521 | int mds = session->s_mds; | 520 | int mds = session->s_mds; |
522 | int actual_wanted; | 521 | int actual_wanted; |
@@ -531,20 +530,10 @@ int ceph_add_cap(struct inode *inode, | |||
531 | if (fmode >= 0) | 530 | if (fmode >= 0) |
532 | wanted |= ceph_caps_for_mode(fmode); | 531 | wanted |= ceph_caps_for_mode(fmode); |
533 | 532 | ||
534 | retry: | ||
535 | spin_lock(&ci->i_ceph_lock); | ||
536 | cap = __get_cap_for_mds(ci, mds); | 533 | cap = __get_cap_for_mds(ci, mds); |
537 | if (!cap) { | 534 | if (!cap) { |
538 | if (new_cap) { | 535 | cap = *new_cap; |
539 | cap = new_cap; | 536 | *new_cap = NULL; |
540 | new_cap = NULL; | ||
541 | } else { | ||
542 | spin_unlock(&ci->i_ceph_lock); | ||
543 | new_cap = get_cap(mdsc, caps_reservation); | ||
544 | if (new_cap == NULL) | ||
545 | return -ENOMEM; | ||
546 | goto retry; | ||
547 | } | ||
548 | 537 | ||
549 | cap->issued = 0; | 538 | cap->issued = 0; |
550 | cap->implemented = 0; | 539 | cap->implemented = 0; |
@@ -562,9 +551,6 @@ retry: | |||
562 | session->s_nr_caps++; | 551 | session->s_nr_caps++; |
563 | spin_unlock(&session->s_cap_lock); | 552 | spin_unlock(&session->s_cap_lock); |
564 | } else { | 553 | } else { |
565 | if (new_cap) | ||
566 | ceph_put_cap(mdsc, new_cap); | ||
567 | |||
568 | /* | 554 | /* |
569 | * auth mds of the inode changed. we received the cap export | 555 | * auth mds of the inode changed. we received the cap export |
570 | * message, but still haven't received the cap import message. | 556 | * message, but still haven't received the cap import message. |
@@ -626,7 +612,6 @@ retry: | |||
626 | ci->i_auth_cap = cap; | 612 | ci->i_auth_cap = cap; |
627 | cap->mds_wanted = wanted; | 613 | cap->mds_wanted = wanted; |
628 | } | 614 | } |
629 | ci->i_cap_exporting_issued = 0; | ||
630 | } else { | 615 | } else { |
631 | WARN_ON(ci->i_auth_cap == cap); | 616 | WARN_ON(ci->i_auth_cap == cap); |
632 | } | 617 | } |
@@ -648,9 +633,6 @@ retry: | |||
648 | 633 | ||
649 | if (fmode >= 0) | 634 | if (fmode >= 0) |
650 | __ceph_get_fmode(ci, fmode); | 635 | __ceph_get_fmode(ci, fmode); |
651 | spin_unlock(&ci->i_ceph_lock); | ||
652 | wake_up_all(&ci->i_cap_wq); | ||
653 | return 0; | ||
654 | } | 636 | } |
655 | 637 | ||
656 | /* | 638 | /* |
@@ -685,7 +667,7 @@ static int __cap_is_valid(struct ceph_cap *cap) | |||
685 | */ | 667 | */ |
686 | int __ceph_caps_issued(struct ceph_inode_info *ci, int *implemented) | 668 | int __ceph_caps_issued(struct ceph_inode_info *ci, int *implemented) |
687 | { | 669 | { |
688 | int have = ci->i_snap_caps | ci->i_cap_exporting_issued; | 670 | int have = ci->i_snap_caps; |
689 | struct ceph_cap *cap; | 671 | struct ceph_cap *cap; |
690 | struct rb_node *p; | 672 | struct rb_node *p; |
691 | 673 | ||
@@ -900,7 +882,7 @@ int __ceph_caps_mds_wanted(struct ceph_inode_info *ci) | |||
900 | */ | 882 | */ |
901 | static int __ceph_is_any_caps(struct ceph_inode_info *ci) | 883 | static int __ceph_is_any_caps(struct ceph_inode_info *ci) |
902 | { | 884 | { |
903 | return !RB_EMPTY_ROOT(&ci->i_caps) || ci->i_cap_exporting_issued; | 885 | return !RB_EMPTY_ROOT(&ci->i_caps); |
904 | } | 886 | } |
905 | 887 | ||
906 | int ceph_is_any_caps(struct inode *inode) | 888 | int ceph_is_any_caps(struct inode *inode) |
@@ -2397,32 +2379,30 @@ static void invalidate_aliases(struct inode *inode) | |||
2397 | * actually be a revocation if it specifies a smaller cap set.) | 2379 | * actually be a revocation if it specifies a smaller cap set.) |
2398 | * | 2380 | * |
2399 | * caller holds s_mutex and i_ceph_lock, we drop both. | 2381 | * caller holds s_mutex and i_ceph_lock, we drop both. |
2400 | * | ||
2401 | * return value: | ||
2402 | * 0 - ok | ||
2403 | * 1 - check_caps on auth cap only (writeback) | ||
2404 | * 2 - check_caps (ack revoke) | ||
2405 | */ | 2382 | */ |
2406 | static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, | 2383 | static void handle_cap_grant(struct ceph_mds_client *mdsc, |
2384 | struct inode *inode, struct ceph_mds_caps *grant, | ||
2385 | void *snaptrace, int snaptrace_len, | ||
2386 | struct ceph_buffer *xattr_buf, | ||
2407 | struct ceph_mds_session *session, | 2387 | struct ceph_mds_session *session, |
2408 | struct ceph_cap *cap, | 2388 | struct ceph_cap *cap, int issued) |
2409 | struct ceph_buffer *xattr_buf) | 2389 | __releases(ci->i_ceph_lock) |
2410 | __releases(ci->i_ceph_lock) | ||
2411 | { | 2390 | { |
2412 | struct ceph_inode_info *ci = ceph_inode(inode); | 2391 | struct ceph_inode_info *ci = ceph_inode(inode); |
2413 | int mds = session->s_mds; | 2392 | int mds = session->s_mds; |
2414 | int seq = le32_to_cpu(grant->seq); | 2393 | int seq = le32_to_cpu(grant->seq); |
2415 | int newcaps = le32_to_cpu(grant->caps); | 2394 | int newcaps = le32_to_cpu(grant->caps); |
2416 | int issued, implemented, used, wanted, dirty; | 2395 | int used, wanted, dirty; |
2417 | u64 size = le64_to_cpu(grant->size); | 2396 | u64 size = le64_to_cpu(grant->size); |
2418 | u64 max_size = le64_to_cpu(grant->max_size); | 2397 | u64 max_size = le64_to_cpu(grant->max_size); |
2419 | struct timespec mtime, atime, ctime; | 2398 | struct timespec mtime, atime, ctime; |
2420 | int check_caps = 0; | 2399 | int check_caps = 0; |
2421 | int wake = 0; | 2400 | bool wake = 0; |
2422 | int writeback = 0; | 2401 | bool writeback = 0; |
2423 | int queue_invalidate = 0; | 2402 | bool queue_trunc = 0; |
2424 | int deleted_inode = 0; | 2403 | bool queue_invalidate = 0; |
2425 | int queue_revalidate = 0; | 2404 | bool queue_revalidate = 0; |
2405 | bool deleted_inode = 0; | ||
2426 | 2406 | ||
2427 | dout("handle_cap_grant inode %p cap %p mds%d seq %d %s\n", | 2407 | dout("handle_cap_grant inode %p cap %p mds%d seq %d %s\n", |
2428 | inode, cap, mds, seq, ceph_cap_string(newcaps)); | 2408 | inode, cap, mds, seq, ceph_cap_string(newcaps)); |
@@ -2466,16 +2446,13 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, | |||
2466 | } | 2446 | } |
2467 | 2447 | ||
2468 | /* side effects now are allowed */ | 2448 | /* side effects now are allowed */ |
2469 | |||
2470 | issued = __ceph_caps_issued(ci, &implemented); | ||
2471 | issued |= implemented | __ceph_caps_dirty(ci); | ||
2472 | |||
2473 | cap->cap_gen = session->s_cap_gen; | 2449 | cap->cap_gen = session->s_cap_gen; |
2474 | cap->seq = seq; | 2450 | cap->seq = seq; |
2475 | 2451 | ||
2476 | __check_cap_issue(ci, cap, newcaps); | 2452 | __check_cap_issue(ci, cap, newcaps); |
2477 | 2453 | ||
2478 | if ((issued & CEPH_CAP_AUTH_EXCL) == 0) { | 2454 | if ((newcaps & CEPH_CAP_AUTH_SHARED) && |
2455 | (issued & CEPH_CAP_AUTH_EXCL) == 0) { | ||
2479 | inode->i_mode = le32_to_cpu(grant->mode); | 2456 | inode->i_mode = le32_to_cpu(grant->mode); |
2480 | inode->i_uid = make_kuid(&init_user_ns, le32_to_cpu(grant->uid)); | 2457 | inode->i_uid = make_kuid(&init_user_ns, le32_to_cpu(grant->uid)); |
2481 | inode->i_gid = make_kgid(&init_user_ns, le32_to_cpu(grant->gid)); | 2458 | inode->i_gid = make_kgid(&init_user_ns, le32_to_cpu(grant->gid)); |
@@ -2484,7 +2461,8 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, | |||
2484 | from_kgid(&init_user_ns, inode->i_gid)); | 2461 | from_kgid(&init_user_ns, inode->i_gid)); |
2485 | } | 2462 | } |
2486 | 2463 | ||
2487 | if ((issued & CEPH_CAP_LINK_EXCL) == 0) { | 2464 | if ((newcaps & CEPH_CAP_AUTH_SHARED) && |
2465 | (issued & CEPH_CAP_LINK_EXCL) == 0) { | ||
2488 | set_nlink(inode, le32_to_cpu(grant->nlink)); | 2466 | set_nlink(inode, le32_to_cpu(grant->nlink)); |
2489 | if (inode->i_nlink == 0 && | 2467 | if (inode->i_nlink == 0 && |
2490 | (newcaps & (CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL))) | 2468 | (newcaps & (CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL))) |
@@ -2511,30 +2489,35 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, | |||
2511 | if ((issued & CEPH_CAP_FILE_CACHE) && ci->i_rdcache_gen > 1) | 2489 | if ((issued & CEPH_CAP_FILE_CACHE) && ci->i_rdcache_gen > 1) |
2512 | queue_revalidate = 1; | 2490 | queue_revalidate = 1; |
2513 | 2491 | ||
2514 | /* size/ctime/mtime/atime? */ | 2492 | if (newcaps & CEPH_CAP_ANY_RD) { |
2515 | ceph_fill_file_size(inode, issued, | 2493 | /* ctime/mtime/atime? */ |
2516 | le32_to_cpu(grant->truncate_seq), | 2494 | ceph_decode_timespec(&mtime, &grant->mtime); |
2517 | le64_to_cpu(grant->truncate_size), size); | 2495 | ceph_decode_timespec(&atime, &grant->atime); |
2518 | ceph_decode_timespec(&mtime, &grant->mtime); | 2496 | ceph_decode_timespec(&ctime, &grant->ctime); |
2519 | ceph_decode_timespec(&atime, &grant->atime); | 2497 | ceph_fill_file_time(inode, issued, |
2520 | ceph_decode_timespec(&ctime, &grant->ctime); | 2498 | le32_to_cpu(grant->time_warp_seq), |
2521 | ceph_fill_file_time(inode, issued, | 2499 | &ctime, &mtime, &atime); |
2522 | le32_to_cpu(grant->time_warp_seq), &ctime, &mtime, | 2500 | } |
2523 | &atime); | 2501 | |
2524 | 2502 | if (newcaps & (CEPH_CAP_ANY_FILE_RD | CEPH_CAP_ANY_FILE_WR)) { | |
2525 | 2503 | /* file layout may have changed */ | |
2526 | /* file layout may have changed */ | 2504 | ci->i_layout = grant->layout; |
2527 | ci->i_layout = grant->layout; | 2505 | /* size/truncate_seq? */ |
2528 | 2506 | queue_trunc = ceph_fill_file_size(inode, issued, | |
2529 | /* max size increase? */ | 2507 | le32_to_cpu(grant->truncate_seq), |
2530 | if (ci->i_auth_cap == cap && max_size != ci->i_max_size) { | 2508 | le64_to_cpu(grant->truncate_size), |
2531 | dout("max_size %lld -> %llu\n", ci->i_max_size, max_size); | 2509 | size); |
2532 | ci->i_max_size = max_size; | 2510 | /* max size increase? */ |
2533 | if (max_size >= ci->i_wanted_max_size) { | 2511 | if (ci->i_auth_cap == cap && max_size != ci->i_max_size) { |
2534 | ci->i_wanted_max_size = 0; /* reset */ | 2512 | dout("max_size %lld -> %llu\n", |
2535 | ci->i_requested_max_size = 0; | 2513 | ci->i_max_size, max_size); |
2514 | ci->i_max_size = max_size; | ||
2515 | if (max_size >= ci->i_wanted_max_size) { | ||
2516 | ci->i_wanted_max_size = 0; /* reset */ | ||
2517 | ci->i_requested_max_size = 0; | ||
2518 | } | ||
2519 | wake = 1; | ||
2536 | } | 2520 | } |
2537 | wake = 1; | ||
2538 | } | 2521 | } |
2539 | 2522 | ||
2540 | /* check cap bits */ | 2523 | /* check cap bits */ |
@@ -2595,6 +2578,23 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, | |||
2595 | 2578 | ||
2596 | spin_unlock(&ci->i_ceph_lock); | 2579 | spin_unlock(&ci->i_ceph_lock); |
2597 | 2580 | ||
2581 | if (le32_to_cpu(grant->op) == CEPH_CAP_OP_IMPORT) { | ||
2582 | down_write(&mdsc->snap_rwsem); | ||
2583 | ceph_update_snap_trace(mdsc, snaptrace, | ||
2584 | snaptrace + snaptrace_len, false); | ||
2585 | downgrade_write(&mdsc->snap_rwsem); | ||
2586 | kick_flushing_inode_caps(mdsc, session, inode); | ||
2587 | up_read(&mdsc->snap_rwsem); | ||
2588 | if (newcaps & ~issued) | ||
2589 | wake = 1; | ||
2590 | } | ||
2591 | |||
2592 | if (queue_trunc) { | ||
2593 | ceph_queue_vmtruncate(inode); | ||
2594 | ceph_queue_revalidate(inode); | ||
2595 | } else if (queue_revalidate) | ||
2596 | ceph_queue_revalidate(inode); | ||
2597 | |||
2598 | if (writeback) | 2598 | if (writeback) |
2599 | /* | 2599 | /* |
2600 | * queue inode for writeback: we can't actually call | 2600 | * queue inode for writeback: we can't actually call |
@@ -2606,8 +2606,6 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, | |||
2606 | ceph_queue_invalidate(inode); | 2606 | ceph_queue_invalidate(inode); |
2607 | if (deleted_inode) | 2607 | if (deleted_inode) |
2608 | invalidate_aliases(inode); | 2608 | invalidate_aliases(inode); |
2609 | if (queue_revalidate) | ||
2610 | ceph_queue_revalidate(inode); | ||
2611 | if (wake) | 2609 | if (wake) |
2612 | wake_up_all(&ci->i_cap_wq); | 2610 | wake_up_all(&ci->i_cap_wq); |
2613 | 2611 | ||
@@ -2784,7 +2782,7 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex, | |||
2784 | { | 2782 | { |
2785 | struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; | 2783 | struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; |
2786 | struct ceph_mds_session *tsession = NULL; | 2784 | struct ceph_mds_session *tsession = NULL; |
2787 | struct ceph_cap *cap, *tcap; | 2785 | struct ceph_cap *cap, *tcap, *new_cap = NULL; |
2788 | struct ceph_inode_info *ci = ceph_inode(inode); | 2786 | struct ceph_inode_info *ci = ceph_inode(inode); |
2789 | u64 t_cap_id; | 2787 | u64 t_cap_id; |
2790 | unsigned mseq = le32_to_cpu(ex->migrate_seq); | 2788 | unsigned mseq = le32_to_cpu(ex->migrate_seq); |
@@ -2807,7 +2805,7 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex, | |||
2807 | retry: | 2805 | retry: |
2808 | spin_lock(&ci->i_ceph_lock); | 2806 | spin_lock(&ci->i_ceph_lock); |
2809 | cap = __get_cap_for_mds(ci, mds); | 2807 | cap = __get_cap_for_mds(ci, mds); |
2810 | if (!cap) | 2808 | if (!cap || cap->cap_id != le64_to_cpu(ex->cap_id)) |
2811 | goto out_unlock; | 2809 | goto out_unlock; |
2812 | 2810 | ||
2813 | if (target < 0) { | 2811 | if (target < 0) { |
@@ -2846,15 +2844,14 @@ retry: | |||
2846 | } | 2844 | } |
2847 | __ceph_remove_cap(cap, false); | 2845 | __ceph_remove_cap(cap, false); |
2848 | goto out_unlock; | 2846 | goto out_unlock; |
2849 | } | 2847 | } else if (tsession) { |
2850 | |||
2851 | if (tsession) { | ||
2852 | int flag = (cap == ci->i_auth_cap) ? CEPH_CAP_FLAG_AUTH : 0; | ||
2853 | spin_unlock(&ci->i_ceph_lock); | ||
2854 | /* add placeholder for the export tagert */ | 2848 | /* add placeholder for the export tagert */ |
2849 | int flag = (cap == ci->i_auth_cap) ? CEPH_CAP_FLAG_AUTH : 0; | ||
2855 | ceph_add_cap(inode, tsession, t_cap_id, -1, issued, 0, | 2850 | ceph_add_cap(inode, tsession, t_cap_id, -1, issued, 0, |
2856 | t_seq - 1, t_mseq, (u64)-1, flag, NULL); | 2851 | t_seq - 1, t_mseq, (u64)-1, flag, &new_cap); |
2857 | goto retry; | 2852 | |
2853 | __ceph_remove_cap(cap, false); | ||
2854 | goto out_unlock; | ||
2858 | } | 2855 | } |
2859 | 2856 | ||
2860 | spin_unlock(&ci->i_ceph_lock); | 2857 | spin_unlock(&ci->i_ceph_lock); |
@@ -2873,6 +2870,7 @@ retry: | |||
2873 | SINGLE_DEPTH_NESTING); | 2870 | SINGLE_DEPTH_NESTING); |
2874 | } | 2871 | } |
2875 | ceph_add_cap_releases(mdsc, tsession); | 2872 | ceph_add_cap_releases(mdsc, tsession); |
2873 | new_cap = ceph_get_cap(mdsc, NULL); | ||
2876 | } else { | 2874 | } else { |
2877 | WARN_ON(1); | 2875 | WARN_ON(1); |
2878 | tsession = NULL; | 2876 | tsession = NULL; |
@@ -2887,24 +2885,27 @@ out_unlock: | |||
2887 | mutex_unlock(&tsession->s_mutex); | 2885 | mutex_unlock(&tsession->s_mutex); |
2888 | ceph_put_mds_session(tsession); | 2886 | ceph_put_mds_session(tsession); |
2889 | } | 2887 | } |
2888 | if (new_cap) | ||
2889 | ceph_put_cap(mdsc, new_cap); | ||
2890 | } | 2890 | } |
2891 | 2891 | ||
2892 | /* | 2892 | /* |
2893 | * Handle cap IMPORT. If there are temp bits from an older EXPORT, | 2893 | * Handle cap IMPORT. |
2894 | * clean them up. | ||
2895 | * | 2894 | * |
2896 | * caller holds s_mutex. | 2895 | * caller holds s_mutex. acquires i_ceph_lock |
2897 | */ | 2896 | */ |
2898 | static void handle_cap_import(struct ceph_mds_client *mdsc, | 2897 | static void handle_cap_import(struct ceph_mds_client *mdsc, |
2899 | struct inode *inode, struct ceph_mds_caps *im, | 2898 | struct inode *inode, struct ceph_mds_caps *im, |
2900 | struct ceph_mds_cap_peer *ph, | 2899 | struct ceph_mds_cap_peer *ph, |
2901 | struct ceph_mds_session *session, | 2900 | struct ceph_mds_session *session, |
2902 | void *snaptrace, int snaptrace_len) | 2901 | struct ceph_cap **target_cap, int *old_issued) |
2902 | __acquires(ci->i_ceph_lock) | ||
2903 | { | 2903 | { |
2904 | struct ceph_inode_info *ci = ceph_inode(inode); | 2904 | struct ceph_inode_info *ci = ceph_inode(inode); |
2905 | struct ceph_cap *cap; | 2905 | struct ceph_cap *cap, *ocap, *new_cap = NULL; |
2906 | int mds = session->s_mds; | 2906 | int mds = session->s_mds; |
2907 | unsigned issued = le32_to_cpu(im->caps); | 2907 | int issued; |
2908 | unsigned caps = le32_to_cpu(im->caps); | ||
2908 | unsigned wanted = le32_to_cpu(im->wanted); | 2909 | unsigned wanted = le32_to_cpu(im->wanted); |
2909 | unsigned seq = le32_to_cpu(im->seq); | 2910 | unsigned seq = le32_to_cpu(im->seq); |
2910 | unsigned mseq = le32_to_cpu(im->migrate_seq); | 2911 | unsigned mseq = le32_to_cpu(im->migrate_seq); |
@@ -2924,40 +2925,52 @@ static void handle_cap_import(struct ceph_mds_client *mdsc, | |||
2924 | dout("handle_cap_import inode %p ci %p mds%d mseq %d peer %d\n", | 2925 | dout("handle_cap_import inode %p ci %p mds%d mseq %d peer %d\n", |
2925 | inode, ci, mds, mseq, peer); | 2926 | inode, ci, mds, mseq, peer); |
2926 | 2927 | ||
2928 | retry: | ||
2927 | spin_lock(&ci->i_ceph_lock); | 2929 | spin_lock(&ci->i_ceph_lock); |
2928 | cap = peer >= 0 ? __get_cap_for_mds(ci, peer) : NULL; | 2930 | cap = __get_cap_for_mds(ci, mds); |
2929 | if (cap && cap->cap_id == p_cap_id) { | 2931 | if (!cap) { |
2932 | if (!new_cap) { | ||
2933 | spin_unlock(&ci->i_ceph_lock); | ||
2934 | new_cap = ceph_get_cap(mdsc, NULL); | ||
2935 | goto retry; | ||
2936 | } | ||
2937 | cap = new_cap; | ||
2938 | } else { | ||
2939 | if (new_cap) { | ||
2940 | ceph_put_cap(mdsc, new_cap); | ||
2941 | new_cap = NULL; | ||
2942 | } | ||
2943 | } | ||
2944 | |||
2945 | __ceph_caps_issued(ci, &issued); | ||
2946 | issued |= __ceph_caps_dirty(ci); | ||
2947 | |||
2948 | ceph_add_cap(inode, session, cap_id, -1, caps, wanted, seq, mseq, | ||
2949 | realmino, CEPH_CAP_FLAG_AUTH, &new_cap); | ||
2950 | |||
2951 | ocap = peer >= 0 ? __get_cap_for_mds(ci, peer) : NULL; | ||
2952 | if (ocap && ocap->cap_id == p_cap_id) { | ||
2930 | dout(" remove export cap %p mds%d flags %d\n", | 2953 | dout(" remove export cap %p mds%d flags %d\n", |
2931 | cap, peer, ph->flags); | 2954 | ocap, peer, ph->flags); |
2932 | if ((ph->flags & CEPH_CAP_FLAG_AUTH) && | 2955 | if ((ph->flags & CEPH_CAP_FLAG_AUTH) && |
2933 | (cap->seq != le32_to_cpu(ph->seq) || | 2956 | (ocap->seq != le32_to_cpu(ph->seq) || |
2934 | cap->mseq != le32_to_cpu(ph->mseq))) { | 2957 | ocap->mseq != le32_to_cpu(ph->mseq))) { |
2935 | pr_err("handle_cap_import: mismatched seq/mseq: " | 2958 | pr_err("handle_cap_import: mismatched seq/mseq: " |
2936 | "ino (%llx.%llx) mds%d seq %d mseq %d " | 2959 | "ino (%llx.%llx) mds%d seq %d mseq %d " |
2937 | "importer mds%d has peer seq %d mseq %d\n", | 2960 | "importer mds%d has peer seq %d mseq %d\n", |
2938 | ceph_vinop(inode), peer, cap->seq, | 2961 | ceph_vinop(inode), peer, ocap->seq, |
2939 | cap->mseq, mds, le32_to_cpu(ph->seq), | 2962 | ocap->mseq, mds, le32_to_cpu(ph->seq), |
2940 | le32_to_cpu(ph->mseq)); | 2963 | le32_to_cpu(ph->mseq)); |
2941 | } | 2964 | } |
2942 | ci->i_cap_exporting_issued = cap->issued; | 2965 | __ceph_remove_cap(ocap, (ph->flags & CEPH_CAP_FLAG_RELEASE)); |
2943 | __ceph_remove_cap(cap, (ph->flags & CEPH_CAP_FLAG_RELEASE)); | ||
2944 | } | 2966 | } |
2945 | 2967 | ||
2946 | /* make sure we re-request max_size, if necessary */ | 2968 | /* make sure we re-request max_size, if necessary */ |
2947 | ci->i_wanted_max_size = 0; | 2969 | ci->i_wanted_max_size = 0; |
2948 | ci->i_requested_max_size = 0; | 2970 | ci->i_requested_max_size = 0; |
2949 | spin_unlock(&ci->i_ceph_lock); | ||
2950 | |||
2951 | down_write(&mdsc->snap_rwsem); | ||
2952 | ceph_update_snap_trace(mdsc, snaptrace, snaptrace+snaptrace_len, | ||
2953 | false); | ||
2954 | downgrade_write(&mdsc->snap_rwsem); | ||
2955 | ceph_add_cap(inode, session, cap_id, -1, | ||
2956 | issued, wanted, seq, mseq, realmino, CEPH_CAP_FLAG_AUTH, | ||
2957 | NULL /* no caps context */); | ||
2958 | kick_flushing_inode_caps(mdsc, session, inode); | ||
2959 | up_read(&mdsc->snap_rwsem); | ||
2960 | 2971 | ||
2972 | *old_issued = issued; | ||
2973 | *target_cap = cap; | ||
2961 | } | 2974 | } |
2962 | 2975 | ||
2963 | /* | 2976 | /* |
@@ -2977,7 +2990,7 @@ void ceph_handle_caps(struct ceph_mds_session *session, | |||
2977 | struct ceph_mds_caps *h; | 2990 | struct ceph_mds_caps *h; |
2978 | struct ceph_mds_cap_peer *peer = NULL; | 2991 | struct ceph_mds_cap_peer *peer = NULL; |
2979 | int mds = session->s_mds; | 2992 | int mds = session->s_mds; |
2980 | int op; | 2993 | int op, issued; |
2981 | u32 seq, mseq; | 2994 | u32 seq, mseq; |
2982 | struct ceph_vino vino; | 2995 | struct ceph_vino vino; |
2983 | u64 cap_id; | 2996 | u64 cap_id; |
@@ -3069,7 +3082,10 @@ void ceph_handle_caps(struct ceph_mds_session *session, | |||
3069 | 3082 | ||
3070 | case CEPH_CAP_OP_IMPORT: | 3083 | case CEPH_CAP_OP_IMPORT: |
3071 | handle_cap_import(mdsc, inode, h, peer, session, | 3084 | handle_cap_import(mdsc, inode, h, peer, session, |
3072 | snaptrace, snaptrace_len); | 3085 | &cap, &issued); |
3086 | handle_cap_grant(mdsc, inode, h, snaptrace, snaptrace_len, | ||
3087 | msg->middle, session, cap, issued); | ||
3088 | goto done_unlocked; | ||
3073 | } | 3089 | } |
3074 | 3090 | ||
3075 | /* the rest require a cap */ | 3091 | /* the rest require a cap */ |
@@ -3086,8 +3102,10 @@ void ceph_handle_caps(struct ceph_mds_session *session, | |||
3086 | switch (op) { | 3102 | switch (op) { |
3087 | case CEPH_CAP_OP_REVOKE: | 3103 | case CEPH_CAP_OP_REVOKE: |
3088 | case CEPH_CAP_OP_GRANT: | 3104 | case CEPH_CAP_OP_GRANT: |
3089 | case CEPH_CAP_OP_IMPORT: | 3105 | __ceph_caps_issued(ci, &issued); |
3090 | handle_cap_grant(inode, h, session, cap, msg->middle); | 3106 | issued |= __ceph_caps_dirty(ci); |
3107 | handle_cap_grant(mdsc, inode, h, NULL, 0, msg->middle, | ||
3108 | session, cap, issued); | ||
3091 | goto done_unlocked; | 3109 | goto done_unlocked; |
3092 | 3110 | ||
3093 | case CEPH_CAP_OP_FLUSH_ACK: | 3111 | case CEPH_CAP_OP_FLUSH_ACK: |
diff --git a/fs/ceph/export.c b/fs/ceph/export.c index 00d6af6a32ec..8d7d782f4382 100644 --- a/fs/ceph/export.c +++ b/fs/ceph/export.c | |||
@@ -169,7 +169,7 @@ static struct dentry *__get_parent(struct super_block *sb, | |||
169 | return dentry; | 169 | return dentry; |
170 | } | 170 | } |
171 | 171 | ||
172 | struct dentry *ceph_get_parent(struct dentry *child) | 172 | static struct dentry *ceph_get_parent(struct dentry *child) |
173 | { | 173 | { |
174 | /* don't re-export snaps */ | 174 | /* don't re-export snaps */ |
175 | if (ceph_snap(child->d_inode) != CEPH_NOSNAP) | 175 | if (ceph_snap(child->d_inode) != CEPH_NOSNAP) |
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index e4fff9ff1c27..04c89c266cec 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c | |||
@@ -10,6 +10,7 @@ | |||
10 | #include <linux/writeback.h> | 10 | #include <linux/writeback.h> |
11 | #include <linux/vmalloc.h> | 11 | #include <linux/vmalloc.h> |
12 | #include <linux/posix_acl.h> | 12 | #include <linux/posix_acl.h> |
13 | #include <linux/random.h> | ||
13 | 14 | ||
14 | #include "super.h" | 15 | #include "super.h" |
15 | #include "mds_client.h" | 16 | #include "mds_client.h" |
@@ -179,9 +180,8 @@ struct ceph_inode_frag *__ceph_find_frag(struct ceph_inode_info *ci, u32 f) | |||
179 | * specified, copy the frag delegation info to the caller if | 180 | * specified, copy the frag delegation info to the caller if |
180 | * it is present. | 181 | * it is present. |
181 | */ | 182 | */ |
182 | u32 ceph_choose_frag(struct ceph_inode_info *ci, u32 v, | 183 | static u32 __ceph_choose_frag(struct ceph_inode_info *ci, u32 v, |
183 | struct ceph_inode_frag *pfrag, | 184 | struct ceph_inode_frag *pfrag, int *found) |
184 | int *found) | ||
185 | { | 185 | { |
186 | u32 t = ceph_frag_make(0, 0); | 186 | u32 t = ceph_frag_make(0, 0); |
187 | struct ceph_inode_frag *frag; | 187 | struct ceph_inode_frag *frag; |
@@ -191,7 +191,6 @@ u32 ceph_choose_frag(struct ceph_inode_info *ci, u32 v, | |||
191 | if (found) | 191 | if (found) |
192 | *found = 0; | 192 | *found = 0; |
193 | 193 | ||
194 | mutex_lock(&ci->i_fragtree_mutex); | ||
195 | while (1) { | 194 | while (1) { |
196 | WARN_ON(!ceph_frag_contains_value(t, v)); | 195 | WARN_ON(!ceph_frag_contains_value(t, v)); |
197 | frag = __ceph_find_frag(ci, t); | 196 | frag = __ceph_find_frag(ci, t); |
@@ -220,10 +219,19 @@ u32 ceph_choose_frag(struct ceph_inode_info *ci, u32 v, | |||
220 | } | 219 | } |
221 | dout("choose_frag(%x) = %x\n", v, t); | 220 | dout("choose_frag(%x) = %x\n", v, t); |
222 | 221 | ||
223 | mutex_unlock(&ci->i_fragtree_mutex); | ||
224 | return t; | 222 | return t; |
225 | } | 223 | } |
226 | 224 | ||
225 | u32 ceph_choose_frag(struct ceph_inode_info *ci, u32 v, | ||
226 | struct ceph_inode_frag *pfrag, int *found) | ||
227 | { | ||
228 | u32 ret; | ||
229 | mutex_lock(&ci->i_fragtree_mutex); | ||
230 | ret = __ceph_choose_frag(ci, v, pfrag, found); | ||
231 | mutex_unlock(&ci->i_fragtree_mutex); | ||
232 | return ret; | ||
233 | } | ||
234 | |||
227 | /* | 235 | /* |
228 | * Process dirfrag (delegation) info from the mds. Include leaf | 236 | * Process dirfrag (delegation) info from the mds. Include leaf |
229 | * fragment in tree ONLY if ndist > 0. Otherwise, only | 237 | * fragment in tree ONLY if ndist > 0. Otherwise, only |
@@ -237,11 +245,17 @@ static int ceph_fill_dirfrag(struct inode *inode, | |||
237 | u32 id = le32_to_cpu(dirinfo->frag); | 245 | u32 id = le32_to_cpu(dirinfo->frag); |
238 | int mds = le32_to_cpu(dirinfo->auth); | 246 | int mds = le32_to_cpu(dirinfo->auth); |
239 | int ndist = le32_to_cpu(dirinfo->ndist); | 247 | int ndist = le32_to_cpu(dirinfo->ndist); |
248 | int diri_auth = -1; | ||
240 | int i; | 249 | int i; |
241 | int err = 0; | 250 | int err = 0; |
242 | 251 | ||
252 | spin_lock(&ci->i_ceph_lock); | ||
253 | if (ci->i_auth_cap) | ||
254 | diri_auth = ci->i_auth_cap->mds; | ||
255 | spin_unlock(&ci->i_ceph_lock); | ||
256 | |||
243 | mutex_lock(&ci->i_fragtree_mutex); | 257 | mutex_lock(&ci->i_fragtree_mutex); |
244 | if (ndist == 0) { | 258 | if (ndist == 0 && mds == diri_auth) { |
245 | /* no delegation info needed. */ | 259 | /* no delegation info needed. */ |
246 | frag = __ceph_find_frag(ci, id); | 260 | frag = __ceph_find_frag(ci, id); |
247 | if (!frag) | 261 | if (!frag) |
@@ -286,6 +300,75 @@ out: | |||
286 | return err; | 300 | return err; |
287 | } | 301 | } |
288 | 302 | ||
303 | static int ceph_fill_fragtree(struct inode *inode, | ||
304 | struct ceph_frag_tree_head *fragtree, | ||
305 | struct ceph_mds_reply_dirfrag *dirinfo) | ||
306 | { | ||
307 | struct ceph_inode_info *ci = ceph_inode(inode); | ||
308 | struct ceph_inode_frag *frag; | ||
309 | struct rb_node *rb_node; | ||
310 | int i; | ||
311 | u32 id, nsplits; | ||
312 | bool update = false; | ||
313 | |||
314 | mutex_lock(&ci->i_fragtree_mutex); | ||
315 | nsplits = le32_to_cpu(fragtree->nsplits); | ||
316 | if (nsplits) { | ||
317 | i = prandom_u32() % nsplits; | ||
318 | id = le32_to_cpu(fragtree->splits[i].frag); | ||
319 | if (!__ceph_find_frag(ci, id)) | ||
320 | update = true; | ||
321 | } else if (!RB_EMPTY_ROOT(&ci->i_fragtree)) { | ||
322 | rb_node = rb_first(&ci->i_fragtree); | ||
323 | frag = rb_entry(rb_node, struct ceph_inode_frag, node); | ||
324 | if (frag->frag != ceph_frag_make(0, 0) || rb_next(rb_node)) | ||
325 | update = true; | ||
326 | } | ||
327 | if (!update && dirinfo) { | ||
328 | id = le32_to_cpu(dirinfo->frag); | ||
329 | if (id != __ceph_choose_frag(ci, id, NULL, NULL)) | ||
330 | update = true; | ||
331 | } | ||
332 | if (!update) | ||
333 | goto out_unlock; | ||
334 | |||
335 | dout("fill_fragtree %llx.%llx\n", ceph_vinop(inode)); | ||
336 | rb_node = rb_first(&ci->i_fragtree); | ||
337 | for (i = 0; i < nsplits; i++) { | ||
338 | id = le32_to_cpu(fragtree->splits[i].frag); | ||
339 | frag = NULL; | ||
340 | while (rb_node) { | ||
341 | frag = rb_entry(rb_node, struct ceph_inode_frag, node); | ||
342 | if (ceph_frag_compare(frag->frag, id) >= 0) { | ||
343 | if (frag->frag != id) | ||
344 | frag = NULL; | ||
345 | else | ||
346 | rb_node = rb_next(rb_node); | ||
347 | break; | ||
348 | } | ||
349 | rb_node = rb_next(rb_node); | ||
350 | rb_erase(&frag->node, &ci->i_fragtree); | ||
351 | kfree(frag); | ||
352 | frag = NULL; | ||
353 | } | ||
354 | if (!frag) { | ||
355 | frag = __get_or_create_frag(ci, id); | ||
356 | if (IS_ERR(frag)) | ||
357 | continue; | ||
358 | } | ||
359 | frag->split_by = le32_to_cpu(fragtree->splits[i].by); | ||
360 | dout(" frag %x split by %d\n", frag->frag, frag->split_by); | ||
361 | } | ||
362 | while (rb_node) { | ||
363 | frag = rb_entry(rb_node, struct ceph_inode_frag, node); | ||
364 | rb_node = rb_next(rb_node); | ||
365 | rb_erase(&frag->node, &ci->i_fragtree); | ||
366 | kfree(frag); | ||
367 | } | ||
368 | out_unlock: | ||
369 | mutex_unlock(&ci->i_fragtree_mutex); | ||
370 | return 0; | ||
371 | } | ||
289 | 372 | ||
290 | /* | 373 | /* |
291 | * initialize a newly allocated inode. | 374 | * initialize a newly allocated inode. |
@@ -341,7 +424,6 @@ struct inode *ceph_alloc_inode(struct super_block *sb) | |||
341 | INIT_LIST_HEAD(&ci->i_cap_snaps); | 424 | INIT_LIST_HEAD(&ci->i_cap_snaps); |
342 | ci->i_head_snapc = NULL; | 425 | ci->i_head_snapc = NULL; |
343 | ci->i_snap_caps = 0; | 426 | ci->i_snap_caps = 0; |
344 | ci->i_cap_exporting_issued = 0; | ||
345 | 427 | ||
346 | for (i = 0; i < CEPH_FILE_MODE_NUM; i++) | 428 | for (i = 0; i < CEPH_FILE_MODE_NUM; i++) |
347 | ci->i_nr_by_mode[i] = 0; | 429 | ci->i_nr_by_mode[i] = 0; |
@@ -407,7 +489,7 @@ void ceph_destroy_inode(struct inode *inode) | |||
407 | 489 | ||
408 | /* | 490 | /* |
409 | * we may still have a snap_realm reference if there are stray | 491 | * we may still have a snap_realm reference if there are stray |
410 | * caps in i_cap_exporting_issued or i_snap_caps. | 492 | * caps in i_snap_caps. |
411 | */ | 493 | */ |
412 | if (ci->i_snap_realm) { | 494 | if (ci->i_snap_realm) { |
413 | struct ceph_mds_client *mdsc = | 495 | struct ceph_mds_client *mdsc = |
@@ -582,22 +664,26 @@ static int fill_inode(struct inode *inode, | |||
582 | unsigned long ttl_from, int cap_fmode, | 664 | unsigned long ttl_from, int cap_fmode, |
583 | struct ceph_cap_reservation *caps_reservation) | 665 | struct ceph_cap_reservation *caps_reservation) |
584 | { | 666 | { |
667 | struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; | ||
585 | struct ceph_mds_reply_inode *info = iinfo->in; | 668 | struct ceph_mds_reply_inode *info = iinfo->in; |
586 | struct ceph_inode_info *ci = ceph_inode(inode); | 669 | struct ceph_inode_info *ci = ceph_inode(inode); |
587 | int i; | 670 | int issued = 0, implemented, new_issued; |
588 | int issued = 0, implemented; | ||
589 | struct timespec mtime, atime, ctime; | 671 | struct timespec mtime, atime, ctime; |
590 | u32 nsplits; | ||
591 | struct ceph_inode_frag *frag; | ||
592 | struct rb_node *rb_node; | ||
593 | struct ceph_buffer *xattr_blob = NULL; | 672 | struct ceph_buffer *xattr_blob = NULL; |
673 | struct ceph_cap *new_cap = NULL; | ||
594 | int err = 0; | 674 | int err = 0; |
595 | int queue_trunc = 0; | 675 | bool wake = false; |
676 | bool queue_trunc = false; | ||
677 | bool new_version = false; | ||
596 | 678 | ||
597 | dout("fill_inode %p ino %llx.%llx v %llu had %llu\n", | 679 | dout("fill_inode %p ino %llx.%llx v %llu had %llu\n", |
598 | inode, ceph_vinop(inode), le64_to_cpu(info->version), | 680 | inode, ceph_vinop(inode), le64_to_cpu(info->version), |
599 | ci->i_version); | 681 | ci->i_version); |
600 | 682 | ||
683 | /* prealloc new cap struct */ | ||
684 | if (info->cap.caps && ceph_snap(inode) == CEPH_NOSNAP) | ||
685 | new_cap = ceph_get_cap(mdsc, caps_reservation); | ||
686 | |||
601 | /* | 687 | /* |
602 | * prealloc xattr data, if it looks like we'll need it. only | 688 | * prealloc xattr data, if it looks like we'll need it. only |
603 | * if len > 4 (meaning there are actually xattrs; the first 4 | 689 | * if len > 4 (meaning there are actually xattrs; the first 4 |
@@ -623,19 +709,23 @@ static int fill_inode(struct inode *inode, | |||
623 | * 3 2 skip | 709 | * 3 2 skip |
624 | * 3 3 update | 710 | * 3 3 update |
625 | */ | 711 | */ |
626 | if (le64_to_cpu(info->version) > 0 && | 712 | if (ci->i_version == 0 || |
627 | (ci->i_version & ~1) >= le64_to_cpu(info->version)) | 713 | ((info->cap.flags & CEPH_CAP_FLAG_AUTH) && |
628 | goto no_change; | 714 | le64_to_cpu(info->version) > (ci->i_version & ~1))) |
629 | 715 | new_version = true; | |
716 | |||
630 | issued = __ceph_caps_issued(ci, &implemented); | 717 | issued = __ceph_caps_issued(ci, &implemented); |
631 | issued |= implemented | __ceph_caps_dirty(ci); | 718 | issued |= implemented | __ceph_caps_dirty(ci); |
719 | new_issued = ~issued & le32_to_cpu(info->cap.caps); | ||
632 | 720 | ||
633 | /* update inode */ | 721 | /* update inode */ |
634 | ci->i_version = le64_to_cpu(info->version); | 722 | ci->i_version = le64_to_cpu(info->version); |
635 | inode->i_version++; | 723 | inode->i_version++; |
636 | inode->i_rdev = le32_to_cpu(info->rdev); | 724 | inode->i_rdev = le32_to_cpu(info->rdev); |
725 | inode->i_blkbits = fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1; | ||
637 | 726 | ||
638 | if ((issued & CEPH_CAP_AUTH_EXCL) == 0) { | 727 | if ((new_version || (new_issued & CEPH_CAP_AUTH_SHARED)) && |
728 | (issued & CEPH_CAP_AUTH_EXCL) == 0) { | ||
639 | inode->i_mode = le32_to_cpu(info->mode); | 729 | inode->i_mode = le32_to_cpu(info->mode); |
640 | inode->i_uid = make_kuid(&init_user_ns, le32_to_cpu(info->uid)); | 730 | inode->i_uid = make_kuid(&init_user_ns, le32_to_cpu(info->uid)); |
641 | inode->i_gid = make_kgid(&init_user_ns, le32_to_cpu(info->gid)); | 731 | inode->i_gid = make_kgid(&init_user_ns, le32_to_cpu(info->gid)); |
@@ -644,23 +734,35 @@ static int fill_inode(struct inode *inode, | |||
644 | from_kgid(&init_user_ns, inode->i_gid)); | 734 | from_kgid(&init_user_ns, inode->i_gid)); |
645 | } | 735 | } |
646 | 736 | ||
647 | if ((issued & CEPH_CAP_LINK_EXCL) == 0) | 737 | if ((new_version || (new_issued & CEPH_CAP_LINK_SHARED)) && |
738 | (issued & CEPH_CAP_LINK_EXCL) == 0) | ||
648 | set_nlink(inode, le32_to_cpu(info->nlink)); | 739 | set_nlink(inode, le32_to_cpu(info->nlink)); |
649 | 740 | ||
650 | /* be careful with mtime, atime, size */ | 741 | if (new_version || (new_issued & CEPH_CAP_ANY_RD)) { |
651 | ceph_decode_timespec(&atime, &info->atime); | 742 | /* be careful with mtime, atime, size */ |
652 | ceph_decode_timespec(&mtime, &info->mtime); | 743 | ceph_decode_timespec(&atime, &info->atime); |
653 | ceph_decode_timespec(&ctime, &info->ctime); | 744 | ceph_decode_timespec(&mtime, &info->mtime); |
654 | queue_trunc = ceph_fill_file_size(inode, issued, | 745 | ceph_decode_timespec(&ctime, &info->ctime); |
655 | le32_to_cpu(info->truncate_seq), | 746 | ceph_fill_file_time(inode, issued, |
656 | le64_to_cpu(info->truncate_size), | 747 | le32_to_cpu(info->time_warp_seq), |
657 | le64_to_cpu(info->size)); | 748 | &ctime, &mtime, &atime); |
658 | ceph_fill_file_time(inode, issued, | 749 | } |
659 | le32_to_cpu(info->time_warp_seq), | 750 | |
660 | &ctime, &mtime, &atime); | 751 | if (new_version || |
661 | 752 | (new_issued & (CEPH_CAP_ANY_FILE_RD | CEPH_CAP_ANY_FILE_WR))) { | |
662 | ci->i_layout = info->layout; | 753 | ci->i_layout = info->layout; |
663 | inode->i_blkbits = fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1; | 754 | queue_trunc = ceph_fill_file_size(inode, issued, |
755 | le32_to_cpu(info->truncate_seq), | ||
756 | le64_to_cpu(info->truncate_size), | ||
757 | le64_to_cpu(info->size)); | ||
758 | /* only update max_size on auth cap */ | ||
759 | if ((info->cap.flags & CEPH_CAP_FLAG_AUTH) && | ||
760 | ci->i_max_size != le64_to_cpu(info->max_size)) { | ||
761 | dout("max_size %lld -> %llu\n", ci->i_max_size, | ||
762 | le64_to_cpu(info->max_size)); | ||
763 | ci->i_max_size = le64_to_cpu(info->max_size); | ||
764 | } | ||
765 | } | ||
664 | 766 | ||
665 | /* xattrs */ | 767 | /* xattrs */ |
666 | /* note that if i_xattrs.len <= 4, i_xattrs.data will still be NULL. */ | 768 | /* note that if i_xattrs.len <= 4, i_xattrs.data will still be NULL. */ |
@@ -745,58 +847,6 @@ static int fill_inode(struct inode *inode, | |||
745 | dout(" marking %p complete (empty)\n", inode); | 847 | dout(" marking %p complete (empty)\n", inode); |
746 | __ceph_dir_set_complete(ci, atomic_read(&ci->i_release_count)); | 848 | __ceph_dir_set_complete(ci, atomic_read(&ci->i_release_count)); |
747 | } | 849 | } |
748 | no_change: | ||
749 | /* only update max_size on auth cap */ | ||
750 | if ((info->cap.flags & CEPH_CAP_FLAG_AUTH) && | ||
751 | ci->i_max_size != le64_to_cpu(info->max_size)) { | ||
752 | dout("max_size %lld -> %llu\n", ci->i_max_size, | ||
753 | le64_to_cpu(info->max_size)); | ||
754 | ci->i_max_size = le64_to_cpu(info->max_size); | ||
755 | } | ||
756 | |||
757 | spin_unlock(&ci->i_ceph_lock); | ||
758 | |||
759 | /* queue truncate if we saw i_size decrease */ | ||
760 | if (queue_trunc) | ||
761 | ceph_queue_vmtruncate(inode); | ||
762 | |||
763 | /* populate frag tree */ | ||
764 | /* FIXME: move me up, if/when version reflects fragtree changes */ | ||
765 | nsplits = le32_to_cpu(info->fragtree.nsplits); | ||
766 | mutex_lock(&ci->i_fragtree_mutex); | ||
767 | rb_node = rb_first(&ci->i_fragtree); | ||
768 | for (i = 0; i < nsplits; i++) { | ||
769 | u32 id = le32_to_cpu(info->fragtree.splits[i].frag); | ||
770 | frag = NULL; | ||
771 | while (rb_node) { | ||
772 | frag = rb_entry(rb_node, struct ceph_inode_frag, node); | ||
773 | if (ceph_frag_compare(frag->frag, id) >= 0) { | ||
774 | if (frag->frag != id) | ||
775 | frag = NULL; | ||
776 | else | ||
777 | rb_node = rb_next(rb_node); | ||
778 | break; | ||
779 | } | ||
780 | rb_node = rb_next(rb_node); | ||
781 | rb_erase(&frag->node, &ci->i_fragtree); | ||
782 | kfree(frag); | ||
783 | frag = NULL; | ||
784 | } | ||
785 | if (!frag) { | ||
786 | frag = __get_or_create_frag(ci, id); | ||
787 | if (IS_ERR(frag)) | ||
788 | continue; | ||
789 | } | ||
790 | frag->split_by = le32_to_cpu(info->fragtree.splits[i].by); | ||
791 | dout(" frag %x split by %d\n", frag->frag, frag->split_by); | ||
792 | } | ||
793 | while (rb_node) { | ||
794 | frag = rb_entry(rb_node, struct ceph_inode_frag, node); | ||
795 | rb_node = rb_next(rb_node); | ||
796 | rb_erase(&frag->node, &ci->i_fragtree); | ||
797 | kfree(frag); | ||
798 | } | ||
799 | mutex_unlock(&ci->i_fragtree_mutex); | ||
800 | 850 | ||
801 | /* were we issued a capability? */ | 851 | /* were we issued a capability? */ |
802 | if (info->cap.caps) { | 852 | if (info->cap.caps) { |
@@ -809,30 +859,41 @@ no_change: | |||
809 | le32_to_cpu(info->cap.seq), | 859 | le32_to_cpu(info->cap.seq), |
810 | le32_to_cpu(info->cap.mseq), | 860 | le32_to_cpu(info->cap.mseq), |
811 | le64_to_cpu(info->cap.realm), | 861 | le64_to_cpu(info->cap.realm), |
812 | info->cap.flags, | 862 | info->cap.flags, &new_cap); |
813 | caps_reservation); | 863 | wake = true; |
814 | } else { | 864 | } else { |
815 | spin_lock(&ci->i_ceph_lock); | ||
816 | dout(" %p got snap_caps %s\n", inode, | 865 | dout(" %p got snap_caps %s\n", inode, |
817 | ceph_cap_string(le32_to_cpu(info->cap.caps))); | 866 | ceph_cap_string(le32_to_cpu(info->cap.caps))); |
818 | ci->i_snap_caps |= le32_to_cpu(info->cap.caps); | 867 | ci->i_snap_caps |= le32_to_cpu(info->cap.caps); |
819 | if (cap_fmode >= 0) | 868 | if (cap_fmode >= 0) |
820 | __ceph_get_fmode(ci, cap_fmode); | 869 | __ceph_get_fmode(ci, cap_fmode); |
821 | spin_unlock(&ci->i_ceph_lock); | ||
822 | } | 870 | } |
823 | } else if (cap_fmode >= 0) { | 871 | } else if (cap_fmode >= 0) { |
824 | pr_warn("mds issued no caps on %llx.%llx\n", | 872 | pr_warn("mds issued no caps on %llx.%llx\n", |
825 | ceph_vinop(inode)); | 873 | ceph_vinop(inode)); |
826 | __ceph_get_fmode(ci, cap_fmode); | 874 | __ceph_get_fmode(ci, cap_fmode); |
827 | } | 875 | } |
876 | spin_unlock(&ci->i_ceph_lock); | ||
877 | |||
878 | if (wake) | ||
879 | wake_up_all(&ci->i_cap_wq); | ||
880 | |||
881 | /* queue truncate if we saw i_size decrease */ | ||
882 | if (queue_trunc) | ||
883 | ceph_queue_vmtruncate(inode); | ||
884 | |||
885 | /* populate frag tree */ | ||
886 | if (S_ISDIR(inode->i_mode)) | ||
887 | ceph_fill_fragtree(inode, &info->fragtree, dirinfo); | ||
828 | 888 | ||
829 | /* update delegation info? */ | 889 | /* update delegation info? */ |
830 | if (dirinfo) | 890 | if (dirinfo) |
831 | ceph_fill_dirfrag(inode, dirinfo); | 891 | ceph_fill_dirfrag(inode, dirinfo); |
832 | 892 | ||
833 | err = 0; | 893 | err = 0; |
834 | |||
835 | out: | 894 | out: |
895 | if (new_cap) | ||
896 | ceph_put_cap(mdsc, new_cap); | ||
836 | if (xattr_blob) | 897 | if (xattr_blob) |
837 | ceph_buffer_put(xattr_blob); | 898 | ceph_buffer_put(xattr_blob); |
838 | return err; | 899 | return err; |
@@ -1485,7 +1546,7 @@ static void ceph_invalidate_work(struct work_struct *work) | |||
1485 | orig_gen = ci->i_rdcache_gen; | 1546 | orig_gen = ci->i_rdcache_gen; |
1486 | spin_unlock(&ci->i_ceph_lock); | 1547 | spin_unlock(&ci->i_ceph_lock); |
1487 | 1548 | ||
1488 | truncate_inode_pages(inode->i_mapping, 0); | 1549 | truncate_pagecache(inode, 0); |
1489 | 1550 | ||
1490 | spin_lock(&ci->i_ceph_lock); | 1551 | spin_lock(&ci->i_ceph_lock); |
1491 | if (orig_gen == ci->i_rdcache_gen && | 1552 | if (orig_gen == ci->i_rdcache_gen && |
@@ -1588,7 +1649,7 @@ retry: | |||
1588 | ci->i_truncate_pending, to); | 1649 | ci->i_truncate_pending, to); |
1589 | spin_unlock(&ci->i_ceph_lock); | 1650 | spin_unlock(&ci->i_ceph_lock); |
1590 | 1651 | ||
1591 | truncate_inode_pages(inode->i_mapping, to); | 1652 | truncate_pagecache(inode, to); |
1592 | 1653 | ||
1593 | spin_lock(&ci->i_ceph_lock); | 1654 | spin_lock(&ci->i_ceph_lock); |
1594 | if (to == ci->i_truncate_size) { | 1655 | if (to == ci->i_truncate_size) { |
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 9a33b98cb000..92a2548278fc 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c | |||
@@ -1558,6 +1558,8 @@ ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode) | |||
1558 | init_completion(&req->r_safe_completion); | 1558 | init_completion(&req->r_safe_completion); |
1559 | INIT_LIST_HEAD(&req->r_unsafe_item); | 1559 | INIT_LIST_HEAD(&req->r_unsafe_item); |
1560 | 1560 | ||
1561 | req->r_stamp = CURRENT_TIME; | ||
1562 | |||
1561 | req->r_op = op; | 1563 | req->r_op = op; |
1562 | req->r_direct_mode = mode; | 1564 | req->r_direct_mode = mode; |
1563 | return req; | 1565 | return req; |
@@ -1783,7 +1785,8 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc, | |||
1783 | } | 1785 | } |
1784 | 1786 | ||
1785 | len = sizeof(*head) + | 1787 | len = sizeof(*head) + |
1786 | pathlen1 + pathlen2 + 2*(1 + sizeof(u32) + sizeof(u64)); | 1788 | pathlen1 + pathlen2 + 2*(1 + sizeof(u32) + sizeof(u64)) + |
1789 | sizeof(struct timespec); | ||
1787 | 1790 | ||
1788 | /* calculate (max) length for cap releases */ | 1791 | /* calculate (max) length for cap releases */ |
1789 | len += sizeof(struct ceph_mds_request_release) * | 1792 | len += sizeof(struct ceph_mds_request_release) * |
@@ -1800,6 +1803,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc, | |||
1800 | goto out_free2; | 1803 | goto out_free2; |
1801 | } | 1804 | } |
1802 | 1805 | ||
1806 | msg->hdr.version = 2; | ||
1803 | msg->hdr.tid = cpu_to_le64(req->r_tid); | 1807 | msg->hdr.tid = cpu_to_le64(req->r_tid); |
1804 | 1808 | ||
1805 | head = msg->front.iov_base; | 1809 | head = msg->front.iov_base; |
@@ -1836,6 +1840,9 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc, | |||
1836 | mds, req->r_old_inode_drop, req->r_old_inode_unless, 0); | 1840 | mds, req->r_old_inode_drop, req->r_old_inode_unless, 0); |
1837 | head->num_releases = cpu_to_le16(releases); | 1841 | head->num_releases = cpu_to_le16(releases); |
1838 | 1842 | ||
1843 | /* time stamp */ | ||
1844 | ceph_encode_copy(&p, &req->r_stamp, sizeof(req->r_stamp)); | ||
1845 | |||
1839 | BUG_ON(p > end); | 1846 | BUG_ON(p > end); |
1840 | msg->front.iov_len = p - msg->front.iov_base; | 1847 | msg->front.iov_len = p - msg->front.iov_base; |
1841 | msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); | 1848 | msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); |
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index e90cfccf93bd..e00737cf523c 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h | |||
@@ -194,6 +194,7 @@ struct ceph_mds_request { | |||
194 | int r_fmode; /* file mode, if expecting cap */ | 194 | int r_fmode; /* file mode, if expecting cap */ |
195 | kuid_t r_uid; | 195 | kuid_t r_uid; |
196 | kgid_t r_gid; | 196 | kgid_t r_gid; |
197 | struct timespec r_stamp; | ||
197 | 198 | ||
198 | /* for choosing which mds to send this request to */ | 199 | /* for choosing which mds to send this request to */ |
199 | int r_direct_mode; | 200 | int r_direct_mode; |
diff --git a/fs/ceph/super.h b/fs/ceph/super.h index ead05cc1f447..12b20744e386 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h | |||
@@ -292,7 +292,6 @@ struct ceph_inode_info { | |||
292 | struct ceph_snap_context *i_head_snapc; /* set if wr_buffer_head > 0 or | 292 | struct ceph_snap_context *i_head_snapc; /* set if wr_buffer_head > 0 or |
293 | dirty|flushing caps */ | 293 | dirty|flushing caps */ |
294 | unsigned i_snap_caps; /* cap bits for snapped files */ | 294 | unsigned i_snap_caps; /* cap bits for snapped files */ |
295 | unsigned i_cap_exporting_issued; | ||
296 | 295 | ||
297 | int i_nr_by_mode[CEPH_FILE_MODE_NUM]; /* open file counts */ | 296 | int i_nr_by_mode[CEPH_FILE_MODE_NUM]; /* open file counts */ |
298 | 297 | ||
@@ -775,11 +774,13 @@ static inline void ceph_forget_all_cached_acls(struct inode *inode) | |||
775 | extern const char *ceph_cap_string(int c); | 774 | extern const char *ceph_cap_string(int c); |
776 | extern void ceph_handle_caps(struct ceph_mds_session *session, | 775 | extern void ceph_handle_caps(struct ceph_mds_session *session, |
777 | struct ceph_msg *msg); | 776 | struct ceph_msg *msg); |
778 | extern int ceph_add_cap(struct inode *inode, | 777 | extern struct ceph_cap *ceph_get_cap(struct ceph_mds_client *mdsc, |
779 | struct ceph_mds_session *session, u64 cap_id, | 778 | struct ceph_cap_reservation *ctx); |
780 | int fmode, unsigned issued, unsigned wanted, | 779 | extern void ceph_add_cap(struct inode *inode, |
781 | unsigned cap, unsigned seq, u64 realmino, int flags, | 780 | struct ceph_mds_session *session, u64 cap_id, |
782 | struct ceph_cap_reservation *caps_reservation); | 781 | int fmode, unsigned issued, unsigned wanted, |
782 | unsigned cap, unsigned seq, u64 realmino, int flags, | ||
783 | struct ceph_cap **new_cap); | ||
783 | extern void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release); | 784 | extern void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release); |
784 | extern void ceph_put_cap(struct ceph_mds_client *mdsc, | 785 | extern void ceph_put_cap(struct ceph_mds_client *mdsc, |
785 | struct ceph_cap *cap); | 786 | struct ceph_cap *cap); |