diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2019-05-16 19:24:01 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-05-16 19:24:01 -0400 |
commit | 1d9d7cbf28a1c2f84f2a0224466f8eb5f0a62ace (patch) | |
tree | 35aa9ec8433f757073f21e1229e97d736b0c5593 /fs/ceph | |
parent | 2c45e7fbc962be1b03f2c2af817a76f5ba810af2 (diff) | |
parent | 00abf69dd24f4444d185982379c5cc3bb7b6d1fc (diff) |
Merge tag 'ceph-for-5.2-rc1' of git://github.com/ceph/ceph-client
Pull ceph updates from Ilya Dryomov:
"On the filesystem side we have:
- a fix to enforce quotas set above the mount point (Luis Henriques)
- support for exporting snapshots through NFS (Zheng Yan)
- proper statx implementation (Jeff Layton). statx flags are mapped
to MDS caps, with AT_STATX_{DONT,FORCE}_SYNC taken into account.
- some follow-up dentry name handling fixes, in particular
elimination of our hand-rolled helper and the switch to __getname()
as suggested by Al (Jeff Layton)
- a set of MDS client cleanups in preparation for async MDS requests
in the future (Jeff Layton)
- a fix to sync the filesystem before remounting (Jeff Layton)
On the rbd side, work is on-going on object-map and fast-diff image
features"
* tag 'ceph-for-5.2-rc1' of git://github.com/ceph/ceph-client: (29 commits)
ceph: flush dirty inodes before proceeding with remount
ceph: fix unaligned access in ceph_send_cap_releases
libceph: make ceph_pr_addr take an struct ceph_entity_addr pointer
libceph: fix unaligned accesses in ceph_entity_addr handling
rbd: don't assert on writes to snapshots
rbd: client_mutex is never nested
ceph: print inode number in __caps_issued_mask debugging messages
ceph: just call get_session in __ceph_lookup_mds_session
ceph: simplify arguments and return semantics of try_get_cap_refs
ceph: fix comment over ceph_drop_caps_for_unlink
ceph: move wait for mds request into helper function
ceph: have ceph_mdsc_do_request call ceph_mdsc_submit_request
ceph: after an MDS request, do callback and completions
ceph: use pathlen values returned by set_request_path_attr
ceph: use __getname/__putname in ceph_mdsc_build_path
ceph: use ceph_mdsc_build_path instead of clone_dentry_name
ceph: fix potential use-after-free in ceph_mdsc_build_path
ceph: dump granular cap info in "caps" debugfs file
ceph: make iterate_session_caps a public symbol
ceph: fix NULL pointer deref when debugging is enabled
...
Diffstat (limited to 'fs/ceph')
-rw-r--r-- | fs/ceph/caps.c | 93 | ||||
-rw-r--r-- | fs/ceph/debugfs.c | 40 | ||||
-rw-r--r-- | fs/ceph/export.c | 356 | ||||
-rw-r--r-- | fs/ceph/file.c | 2 | ||||
-rw-r--r-- | fs/ceph/inode.c | 85 | ||||
-rw-r--r-- | fs/ceph/locks.c | 13 | ||||
-rw-r--r-- | fs/ceph/mds_client.c | 205 | ||||
-rw-r--r-- | fs/ceph/mds_client.h | 33 | ||||
-rw-r--r-- | fs/ceph/mdsmap.c | 2 | ||||
-rw-r--r-- | fs/ceph/quota.c | 177 | ||||
-rw-r--r-- | fs/ceph/super.c | 7 | ||||
-rw-r--r-- | fs/ceph/super.h | 2 |
12 files changed, 751 insertions, 264 deletions
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 36a8dc699448..72f8e1311392 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c | |||
@@ -892,8 +892,8 @@ int __ceph_caps_issued_mask(struct ceph_inode_info *ci, int mask, int touch) | |||
892 | int have = ci->i_snap_caps; | 892 | int have = ci->i_snap_caps; |
893 | 893 | ||
894 | if ((have & mask) == mask) { | 894 | if ((have & mask) == mask) { |
895 | dout("__ceph_caps_issued_mask %p snap issued %s" | 895 | dout("__ceph_caps_issued_mask ino 0x%lx snap issued %s" |
896 | " (mask %s)\n", &ci->vfs_inode, | 896 | " (mask %s)\n", ci->vfs_inode.i_ino, |
897 | ceph_cap_string(have), | 897 | ceph_cap_string(have), |
898 | ceph_cap_string(mask)); | 898 | ceph_cap_string(mask)); |
899 | return 1; | 899 | return 1; |
@@ -904,8 +904,8 @@ int __ceph_caps_issued_mask(struct ceph_inode_info *ci, int mask, int touch) | |||
904 | if (!__cap_is_valid(cap)) | 904 | if (!__cap_is_valid(cap)) |
905 | continue; | 905 | continue; |
906 | if ((cap->issued & mask) == mask) { | 906 | if ((cap->issued & mask) == mask) { |
907 | dout("__ceph_caps_issued_mask %p cap %p issued %s" | 907 | dout("__ceph_caps_issued_mask ino 0x%lx cap %p issued %s" |
908 | " (mask %s)\n", &ci->vfs_inode, cap, | 908 | " (mask %s)\n", ci->vfs_inode.i_ino, cap, |
909 | ceph_cap_string(cap->issued), | 909 | ceph_cap_string(cap->issued), |
910 | ceph_cap_string(mask)); | 910 | ceph_cap_string(mask)); |
911 | if (touch) | 911 | if (touch) |
@@ -916,8 +916,8 @@ int __ceph_caps_issued_mask(struct ceph_inode_info *ci, int mask, int touch) | |||
916 | /* does a combination of caps satisfy mask? */ | 916 | /* does a combination of caps satisfy mask? */ |
917 | have |= cap->issued; | 917 | have |= cap->issued; |
918 | if ((have & mask) == mask) { | 918 | if ((have & mask) == mask) { |
919 | dout("__ceph_caps_issued_mask %p combo issued %s" | 919 | dout("__ceph_caps_issued_mask ino 0x%lx combo issued %s" |
920 | " (mask %s)\n", &ci->vfs_inode, | 920 | " (mask %s)\n", ci->vfs_inode.i_ino, |
921 | ceph_cap_string(cap->issued), | 921 | ceph_cap_string(cap->issued), |
922 | ceph_cap_string(mask)); | 922 | ceph_cap_string(mask)); |
923 | if (touch) { | 923 | if (touch) { |
@@ -2257,8 +2257,6 @@ int ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync) | |||
2257 | if (datasync) | 2257 | if (datasync) |
2258 | goto out; | 2258 | goto out; |
2259 | 2259 | ||
2260 | inode_lock(inode); | ||
2261 | |||
2262 | dirty = try_flush_caps(inode, &flush_tid); | 2260 | dirty = try_flush_caps(inode, &flush_tid); |
2263 | dout("fsync dirty caps are %s\n", ceph_cap_string(dirty)); | 2261 | dout("fsync dirty caps are %s\n", ceph_cap_string(dirty)); |
2264 | 2262 | ||
@@ -2273,7 +2271,6 @@ int ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync) | |||
2273 | ret = wait_event_interruptible(ci->i_cap_wq, | 2271 | ret = wait_event_interruptible(ci->i_cap_wq, |
2274 | caps_are_flushed(inode, flush_tid)); | 2272 | caps_are_flushed(inode, flush_tid)); |
2275 | } | 2273 | } |
2276 | inode_unlock(inode); | ||
2277 | out: | 2274 | out: |
2278 | dout("fsync %p%s result=%d\n", inode, datasync ? " datasync" : "", ret); | 2275 | dout("fsync %p%s result=%d\n", inode, datasync ? " datasync" : "", ret); |
2279 | return ret; | 2276 | return ret; |
@@ -2528,9 +2525,14 @@ static void __take_cap_refs(struct ceph_inode_info *ci, int got, | |||
2528 | * to (when applicable), and check against max_size here as well. | 2525 | * to (when applicable), and check against max_size here as well. |
2529 | * Note that caller is responsible for ensuring max_size increases are | 2526 | * Note that caller is responsible for ensuring max_size increases are |
2530 | * requested from the MDS. | 2527 | * requested from the MDS. |
2528 | * | ||
2529 | * Returns 0 if caps were not able to be acquired (yet), a 1 if they were, | ||
2530 | * or a negative error code. | ||
2531 | * | ||
2532 | * FIXME: how does a 0 return differ from -EAGAIN? | ||
2531 | */ | 2533 | */ |
2532 | static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want, | 2534 | static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want, |
2533 | loff_t endoff, bool nonblock, int *got, int *err) | 2535 | loff_t endoff, bool nonblock, int *got) |
2534 | { | 2536 | { |
2535 | struct inode *inode = &ci->vfs_inode; | 2537 | struct inode *inode = &ci->vfs_inode; |
2536 | struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; | 2538 | struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; |
@@ -2550,8 +2552,7 @@ again: | |||
2550 | if ((file_wanted & need) != need) { | 2552 | if ((file_wanted & need) != need) { |
2551 | dout("try_get_cap_refs need %s file_wanted %s, EBADF\n", | 2553 | dout("try_get_cap_refs need %s file_wanted %s, EBADF\n", |
2552 | ceph_cap_string(need), ceph_cap_string(file_wanted)); | 2554 | ceph_cap_string(need), ceph_cap_string(file_wanted)); |
2553 | *err = -EBADF; | 2555 | ret = -EBADF; |
2554 | ret = 1; | ||
2555 | goto out_unlock; | 2556 | goto out_unlock; |
2556 | } | 2557 | } |
2557 | 2558 | ||
@@ -2572,10 +2573,8 @@ again: | |||
2572 | if (endoff >= 0 && endoff > (loff_t)ci->i_max_size) { | 2573 | if (endoff >= 0 && endoff > (loff_t)ci->i_max_size) { |
2573 | dout("get_cap_refs %p endoff %llu > maxsize %llu\n", | 2574 | dout("get_cap_refs %p endoff %llu > maxsize %llu\n", |
2574 | inode, endoff, ci->i_max_size); | 2575 | inode, endoff, ci->i_max_size); |
2575 | if (endoff > ci->i_requested_max_size) { | 2576 | if (endoff > ci->i_requested_max_size) |
2576 | *err = -EAGAIN; | 2577 | ret = -EAGAIN; |
2577 | ret = 1; | ||
2578 | } | ||
2579 | goto out_unlock; | 2578 | goto out_unlock; |
2580 | } | 2579 | } |
2581 | /* | 2580 | /* |
@@ -2610,8 +2609,7 @@ again: | |||
2610 | * task isn't in TASK_RUNNING state | 2609 | * task isn't in TASK_RUNNING state |
2611 | */ | 2610 | */ |
2612 | if (nonblock) { | 2611 | if (nonblock) { |
2613 | *err = -EAGAIN; | 2612 | ret = -EAGAIN; |
2614 | ret = 1; | ||
2615 | goto out_unlock; | 2613 | goto out_unlock; |
2616 | } | 2614 | } |
2617 | 2615 | ||
@@ -2640,8 +2638,7 @@ again: | |||
2640 | if (session_readonly) { | 2638 | if (session_readonly) { |
2641 | dout("get_cap_refs %p needed %s but mds%d readonly\n", | 2639 | dout("get_cap_refs %p needed %s but mds%d readonly\n", |
2642 | inode, ceph_cap_string(need), ci->i_auth_cap->mds); | 2640 | inode, ceph_cap_string(need), ci->i_auth_cap->mds); |
2643 | *err = -EROFS; | 2641 | ret = -EROFS; |
2644 | ret = 1; | ||
2645 | goto out_unlock; | 2642 | goto out_unlock; |
2646 | } | 2643 | } |
2647 | 2644 | ||
@@ -2650,16 +2647,14 @@ again: | |||
2650 | if (READ_ONCE(mdsc->fsc->mount_state) == | 2647 | if (READ_ONCE(mdsc->fsc->mount_state) == |
2651 | CEPH_MOUNT_SHUTDOWN) { | 2648 | CEPH_MOUNT_SHUTDOWN) { |
2652 | dout("get_cap_refs %p forced umount\n", inode); | 2649 | dout("get_cap_refs %p forced umount\n", inode); |
2653 | *err = -EIO; | 2650 | ret = -EIO; |
2654 | ret = 1; | ||
2655 | goto out_unlock; | 2651 | goto out_unlock; |
2656 | } | 2652 | } |
2657 | mds_wanted = __ceph_caps_mds_wanted(ci, false); | 2653 | mds_wanted = __ceph_caps_mds_wanted(ci, false); |
2658 | if (need & ~(mds_wanted & need)) { | 2654 | if (need & ~(mds_wanted & need)) { |
2659 | dout("get_cap_refs %p caps were dropped" | 2655 | dout("get_cap_refs %p caps were dropped" |
2660 | " (session killed?)\n", inode); | 2656 | " (session killed?)\n", inode); |
2661 | *err = -ESTALE; | 2657 | ret = -ESTALE; |
2662 | ret = 1; | ||
2663 | goto out_unlock; | 2658 | goto out_unlock; |
2664 | } | 2659 | } |
2665 | if (!(file_wanted & ~mds_wanted)) | 2660 | if (!(file_wanted & ~mds_wanted)) |
@@ -2710,7 +2705,7 @@ static void check_max_size(struct inode *inode, loff_t endoff) | |||
2710 | int ceph_try_get_caps(struct ceph_inode_info *ci, int need, int want, | 2705 | int ceph_try_get_caps(struct ceph_inode_info *ci, int need, int want, |
2711 | bool nonblock, int *got) | 2706 | bool nonblock, int *got) |
2712 | { | 2707 | { |
2713 | int ret, err = 0; | 2708 | int ret; |
2714 | 2709 | ||
2715 | BUG_ON(need & ~CEPH_CAP_FILE_RD); | 2710 | BUG_ON(need & ~CEPH_CAP_FILE_RD); |
2716 | BUG_ON(want & ~(CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO|CEPH_CAP_FILE_SHARED)); | 2711 | BUG_ON(want & ~(CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO|CEPH_CAP_FILE_SHARED)); |
@@ -2718,15 +2713,8 @@ int ceph_try_get_caps(struct ceph_inode_info *ci, int need, int want, | |||
2718 | if (ret < 0) | 2713 | if (ret < 0) |
2719 | return ret; | 2714 | return ret; |
2720 | 2715 | ||
2721 | ret = try_get_cap_refs(ci, need, want, 0, nonblock, got, &err); | 2716 | ret = try_get_cap_refs(ci, need, want, 0, nonblock, got); |
2722 | if (ret) { | 2717 | return ret == -EAGAIN ? 0 : ret; |
2723 | if (err == -EAGAIN) { | ||
2724 | ret = 0; | ||
2725 | } else if (err < 0) { | ||
2726 | ret = err; | ||
2727 | } | ||
2728 | } | ||
2729 | return ret; | ||
2730 | } | 2718 | } |
2731 | 2719 | ||
2732 | /* | 2720 | /* |
@@ -2737,7 +2725,7 @@ int ceph_try_get_caps(struct ceph_inode_info *ci, int need, int want, | |||
2737 | int ceph_get_caps(struct ceph_inode_info *ci, int need, int want, | 2725 | int ceph_get_caps(struct ceph_inode_info *ci, int need, int want, |
2738 | loff_t endoff, int *got, struct page **pinned_page) | 2726 | loff_t endoff, int *got, struct page **pinned_page) |
2739 | { | 2727 | { |
2740 | int _got, ret, err = 0; | 2728 | int _got, ret; |
2741 | 2729 | ||
2742 | ret = ceph_pool_perm_check(ci, need); | 2730 | ret = ceph_pool_perm_check(ci, need); |
2743 | if (ret < 0) | 2731 | if (ret < 0) |
@@ -2747,21 +2735,19 @@ int ceph_get_caps(struct ceph_inode_info *ci, int need, int want, | |||
2747 | if (endoff > 0) | 2735 | if (endoff > 0) |
2748 | check_max_size(&ci->vfs_inode, endoff); | 2736 | check_max_size(&ci->vfs_inode, endoff); |
2749 | 2737 | ||
2750 | err = 0; | ||
2751 | _got = 0; | 2738 | _got = 0; |
2752 | ret = try_get_cap_refs(ci, need, want, endoff, | 2739 | ret = try_get_cap_refs(ci, need, want, endoff, |
2753 | false, &_got, &err); | 2740 | false, &_got); |
2754 | if (ret) { | 2741 | if (ret == -EAGAIN) { |
2755 | if (err == -EAGAIN) | 2742 | continue; |
2756 | continue; | 2743 | } else if (!ret) { |
2757 | if (err < 0) | 2744 | int err; |
2758 | ret = err; | 2745 | |
2759 | } else { | ||
2760 | DEFINE_WAIT_FUNC(wait, woken_wake_function); | 2746 | DEFINE_WAIT_FUNC(wait, woken_wake_function); |
2761 | add_wait_queue(&ci->i_cap_wq, &wait); | 2747 | add_wait_queue(&ci->i_cap_wq, &wait); |
2762 | 2748 | ||
2763 | while (!try_get_cap_refs(ci, need, want, endoff, | 2749 | while (!(err = try_get_cap_refs(ci, need, want, endoff, |
2764 | true, &_got, &err)) { | 2750 | true, &_got))) { |
2765 | if (signal_pending(current)) { | 2751 | if (signal_pending(current)) { |
2766 | ret = -ERESTARTSYS; | 2752 | ret = -ERESTARTSYS; |
2767 | break; | 2753 | break; |
@@ -2770,19 +2756,14 @@ int ceph_get_caps(struct ceph_inode_info *ci, int need, int want, | |||
2770 | } | 2756 | } |
2771 | 2757 | ||
2772 | remove_wait_queue(&ci->i_cap_wq, &wait); | 2758 | remove_wait_queue(&ci->i_cap_wq, &wait); |
2773 | |||
2774 | if (err == -EAGAIN) | 2759 | if (err == -EAGAIN) |
2775 | continue; | 2760 | continue; |
2776 | if (err < 0) | ||
2777 | ret = err; | ||
2778 | } | 2761 | } |
2779 | if (ret < 0) { | 2762 | if (ret == -ESTALE) { |
2780 | if (err == -ESTALE) { | 2763 | /* session was killed, try renew caps */ |
2781 | /* session was killed, try renew caps */ | 2764 | ret = ceph_renew_caps(&ci->vfs_inode); |
2782 | ret = ceph_renew_caps(&ci->vfs_inode); | 2765 | if (ret == 0) |
2783 | if (ret == 0) | 2766 | continue; |
2784 | continue; | ||
2785 | } | ||
2786 | return ret; | 2767 | return ret; |
2787 | } | 2768 | } |
2788 | 2769 | ||
@@ -4099,7 +4080,7 @@ void ceph_put_fmode(struct ceph_inode_info *ci, int fmode) | |||
4099 | } | 4080 | } |
4100 | 4081 | ||
4101 | /* | 4082 | /* |
4102 | * For a soon-to-be unlinked file, drop the AUTH_RDCACHE caps. If it | 4083 | * For a soon-to-be unlinked file, drop the LINK caps. If it |
4103 | * looks like the link count will hit 0, drop any other caps (other | 4084 | * looks like the link count will hit 0, drop any other caps (other |
4104 | * than PIN) we don't specifically want (due to the file still being | 4085 | * than PIN) we don't specifically want (due to the file still being |
4105 | * open). | 4086 | * open). |
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c index 98365e74cb4a..b3fc5fe26a1a 100644 --- a/fs/ceph/debugfs.c +++ b/fs/ceph/debugfs.c | |||
@@ -37,7 +37,7 @@ static int mdsmap_show(struct seq_file *s, void *p) | |||
37 | struct ceph_entity_addr *addr = &mdsmap->m_info[i].addr; | 37 | struct ceph_entity_addr *addr = &mdsmap->m_info[i].addr; |
38 | int state = mdsmap->m_info[i].state; | 38 | int state = mdsmap->m_info[i].state; |
39 | seq_printf(s, "\tmds%d\t%s\t(%s)\n", i, | 39 | seq_printf(s, "\tmds%d\t%s\t(%s)\n", i, |
40 | ceph_pr_addr(&addr->in_addr), | 40 | ceph_pr_addr(addr), |
41 | ceph_mds_state_name(state)); | 41 | ceph_mds_state_name(state)); |
42 | } | 42 | } |
43 | return 0; | 43 | return 0; |
@@ -88,7 +88,7 @@ static int mdsc_show(struct seq_file *s, void *p) | |||
88 | req->r_dentry, | 88 | req->r_dentry, |
89 | path ? path : ""); | 89 | path ? path : ""); |
90 | spin_unlock(&req->r_dentry->d_lock); | 90 | spin_unlock(&req->r_dentry->d_lock); |
91 | kfree(path); | 91 | ceph_mdsc_free_path(path, pathlen); |
92 | } else if (req->r_path1) { | 92 | } else if (req->r_path1) { |
93 | seq_printf(s, " #%llx/%s", req->r_ino1.ino, | 93 | seq_printf(s, " #%llx/%s", req->r_ino1.ino, |
94 | req->r_path1); | 94 | req->r_path1); |
@@ -108,7 +108,7 @@ static int mdsc_show(struct seq_file *s, void *p) | |||
108 | req->r_old_dentry, | 108 | req->r_old_dentry, |
109 | path ? path : ""); | 109 | path ? path : ""); |
110 | spin_unlock(&req->r_old_dentry->d_lock); | 110 | spin_unlock(&req->r_old_dentry->d_lock); |
111 | kfree(path); | 111 | ceph_mdsc_free_path(path, pathlen); |
112 | } else if (req->r_path2 && req->r_op != CEPH_MDS_OP_SYMLINK) { | 112 | } else if (req->r_path2 && req->r_op != CEPH_MDS_OP_SYMLINK) { |
113 | if (req->r_ino2.ino) | 113 | if (req->r_ino2.ino) |
114 | seq_printf(s, " #%llx/%s", req->r_ino2.ino, | 114 | seq_printf(s, " #%llx/%s", req->r_ino2.ino, |
@@ -124,18 +124,48 @@ static int mdsc_show(struct seq_file *s, void *p) | |||
124 | return 0; | 124 | return 0; |
125 | } | 125 | } |
126 | 126 | ||
127 | static int caps_show_cb(struct inode *inode, struct ceph_cap *cap, void *p) | ||
128 | { | ||
129 | struct seq_file *s = p; | ||
130 | |||
131 | seq_printf(s, "0x%-17lx%-17s%-17s\n", inode->i_ino, | ||
132 | ceph_cap_string(cap->issued), | ||
133 | ceph_cap_string(cap->implemented)); | ||
134 | return 0; | ||
135 | } | ||
136 | |||
127 | static int caps_show(struct seq_file *s, void *p) | 137 | static int caps_show(struct seq_file *s, void *p) |
128 | { | 138 | { |
129 | struct ceph_fs_client *fsc = s->private; | 139 | struct ceph_fs_client *fsc = s->private; |
130 | int total, avail, used, reserved, min; | 140 | struct ceph_mds_client *mdsc = fsc->mdsc; |
141 | int total, avail, used, reserved, min, i; | ||
131 | 142 | ||
132 | ceph_reservation_status(fsc, &total, &avail, &used, &reserved, &min); | 143 | ceph_reservation_status(fsc, &total, &avail, &used, &reserved, &min); |
133 | seq_printf(s, "total\t\t%d\n" | 144 | seq_printf(s, "total\t\t%d\n" |
134 | "avail\t\t%d\n" | 145 | "avail\t\t%d\n" |
135 | "used\t\t%d\n" | 146 | "used\t\t%d\n" |
136 | "reserved\t%d\n" | 147 | "reserved\t%d\n" |
137 | "min\t%d\n", | 148 | "min\t\t%d\n\n", |
138 | total, avail, used, reserved, min); | 149 | total, avail, used, reserved, min); |
150 | seq_printf(s, "ino issued implemented\n"); | ||
151 | seq_printf(s, "-----------------------------------------------\n"); | ||
152 | |||
153 | mutex_lock(&mdsc->mutex); | ||
154 | for (i = 0; i < mdsc->max_sessions; i++) { | ||
155 | struct ceph_mds_session *session; | ||
156 | |||
157 | session = __ceph_lookup_mds_session(mdsc, i); | ||
158 | if (!session) | ||
159 | continue; | ||
160 | mutex_unlock(&mdsc->mutex); | ||
161 | mutex_lock(&session->s_mutex); | ||
162 | ceph_iterate_session_caps(session, caps_show_cb, s); | ||
163 | mutex_unlock(&session->s_mutex); | ||
164 | ceph_put_mds_session(session); | ||
165 | mutex_lock(&mdsc->mutex); | ||
166 | } | ||
167 | mutex_unlock(&mdsc->mutex); | ||
168 | |||
139 | return 0; | 169 | return 0; |
140 | } | 170 | } |
141 | 171 | ||
diff --git a/fs/ceph/export.c b/fs/ceph/export.c index 3c59ad180ef0..d3ef7ee429ec 100644 --- a/fs/ceph/export.c +++ b/fs/ceph/export.c | |||
@@ -22,18 +22,77 @@ struct ceph_nfs_confh { | |||
22 | u64 ino, parent_ino; | 22 | u64 ino, parent_ino; |
23 | } __attribute__ ((packed)); | 23 | } __attribute__ ((packed)); |
24 | 24 | ||
25 | /* | ||
26 | * fh for snapped inode | ||
27 | */ | ||
28 | struct ceph_nfs_snapfh { | ||
29 | u64 ino; | ||
30 | u64 snapid; | ||
31 | u64 parent_ino; | ||
32 | u32 hash; | ||
33 | } __attribute__ ((packed)); | ||
34 | |||
35 | static int ceph_encode_snapfh(struct inode *inode, u32 *rawfh, int *max_len, | ||
36 | struct inode *parent_inode) | ||
37 | { | ||
38 | const static int snap_handle_length = | ||
39 | sizeof(struct ceph_nfs_snapfh) >> 2; | ||
40 | struct ceph_nfs_snapfh *sfh = (void *)rawfh; | ||
41 | u64 snapid = ceph_snap(inode); | ||
42 | int ret; | ||
43 | bool no_parent = true; | ||
44 | |||
45 | if (*max_len < snap_handle_length) { | ||
46 | *max_len = snap_handle_length; | ||
47 | ret = FILEID_INVALID; | ||
48 | goto out; | ||
49 | } | ||
50 | |||
51 | ret = -EINVAL; | ||
52 | if (snapid != CEPH_SNAPDIR) { | ||
53 | struct inode *dir; | ||
54 | struct dentry *dentry = d_find_alias(inode); | ||
55 | if (!dentry) | ||
56 | goto out; | ||
57 | |||
58 | rcu_read_lock(); | ||
59 | dir = d_inode_rcu(dentry->d_parent); | ||
60 | if (ceph_snap(dir) != CEPH_SNAPDIR) { | ||
61 | sfh->parent_ino = ceph_ino(dir); | ||
62 | sfh->hash = ceph_dentry_hash(dir, dentry); | ||
63 | no_parent = false; | ||
64 | } | ||
65 | rcu_read_unlock(); | ||
66 | dput(dentry); | ||
67 | } | ||
68 | |||
69 | if (no_parent) { | ||
70 | if (!S_ISDIR(inode->i_mode)) | ||
71 | goto out; | ||
72 | sfh->parent_ino = sfh->ino; | ||
73 | sfh->hash = 0; | ||
74 | } | ||
75 | sfh->ino = ceph_ino(inode); | ||
76 | sfh->snapid = snapid; | ||
77 | |||
78 | *max_len = snap_handle_length; | ||
79 | ret = FILEID_BTRFS_WITH_PARENT; | ||
80 | out: | ||
81 | dout("encode_snapfh %llx.%llx ret=%d\n", ceph_vinop(inode), ret); | ||
82 | return ret; | ||
83 | } | ||
84 | |||
25 | static int ceph_encode_fh(struct inode *inode, u32 *rawfh, int *max_len, | 85 | static int ceph_encode_fh(struct inode *inode, u32 *rawfh, int *max_len, |
26 | struct inode *parent_inode) | 86 | struct inode *parent_inode) |
27 | { | 87 | { |
88 | const static int handle_length = | ||
89 | sizeof(struct ceph_nfs_fh) >> 2; | ||
90 | const static int connected_handle_length = | ||
91 | sizeof(struct ceph_nfs_confh) >> 2; | ||
28 | int type; | 92 | int type; |
29 | struct ceph_nfs_fh *fh = (void *)rawfh; | ||
30 | struct ceph_nfs_confh *cfh = (void *)rawfh; | ||
31 | int connected_handle_length = sizeof(*cfh)/4; | ||
32 | int handle_length = sizeof(*fh)/4; | ||
33 | 93 | ||
34 | /* don't re-export snaps */ | ||
35 | if (ceph_snap(inode) != CEPH_NOSNAP) | 94 | if (ceph_snap(inode) != CEPH_NOSNAP) |
36 | return -EINVAL; | 95 | return ceph_encode_snapfh(inode, rawfh, max_len, parent_inode); |
37 | 96 | ||
38 | if (parent_inode && (*max_len < connected_handle_length)) { | 97 | if (parent_inode && (*max_len < connected_handle_length)) { |
39 | *max_len = connected_handle_length; | 98 | *max_len = connected_handle_length; |
@@ -44,6 +103,7 @@ static int ceph_encode_fh(struct inode *inode, u32 *rawfh, int *max_len, | |||
44 | } | 103 | } |
45 | 104 | ||
46 | if (parent_inode) { | 105 | if (parent_inode) { |
106 | struct ceph_nfs_confh *cfh = (void *)rawfh; | ||
47 | dout("encode_fh %llx with parent %llx\n", | 107 | dout("encode_fh %llx with parent %llx\n", |
48 | ceph_ino(inode), ceph_ino(parent_inode)); | 108 | ceph_ino(inode), ceph_ino(parent_inode)); |
49 | cfh->ino = ceph_ino(inode); | 109 | cfh->ino = ceph_ino(inode); |
@@ -51,6 +111,7 @@ static int ceph_encode_fh(struct inode *inode, u32 *rawfh, int *max_len, | |||
51 | *max_len = connected_handle_length; | 111 | *max_len = connected_handle_length; |
52 | type = FILEID_INO32_GEN_PARENT; | 112 | type = FILEID_INO32_GEN_PARENT; |
53 | } else { | 113 | } else { |
114 | struct ceph_nfs_fh *fh = (void *)rawfh; | ||
54 | dout("encode_fh %llx\n", ceph_ino(inode)); | 115 | dout("encode_fh %llx\n", ceph_ino(inode)); |
55 | fh->ino = ceph_ino(inode); | 116 | fh->ino = ceph_ino(inode); |
56 | *max_len = handle_length; | 117 | *max_len = handle_length; |
@@ -59,7 +120,7 @@ static int ceph_encode_fh(struct inode *inode, u32 *rawfh, int *max_len, | |||
59 | return type; | 120 | return type; |
60 | } | 121 | } |
61 | 122 | ||
62 | static struct dentry *__fh_to_dentry(struct super_block *sb, u64 ino) | 123 | static struct inode *__lookup_inode(struct super_block *sb, u64 ino) |
63 | { | 124 | { |
64 | struct ceph_mds_client *mdsc = ceph_sb_to_client(sb)->mdsc; | 125 | struct ceph_mds_client *mdsc = ceph_sb_to_client(sb)->mdsc; |
65 | struct inode *inode; | 126 | struct inode *inode; |
@@ -81,7 +142,7 @@ static struct dentry *__fh_to_dentry(struct super_block *sb, u64 ino) | |||
81 | mask = CEPH_STAT_CAP_INODE; | 142 | mask = CEPH_STAT_CAP_INODE; |
82 | if (ceph_security_xattr_wanted(d_inode(sb->s_root))) | 143 | if (ceph_security_xattr_wanted(d_inode(sb->s_root))) |
83 | mask |= CEPH_CAP_XATTR_SHARED; | 144 | mask |= CEPH_CAP_XATTR_SHARED; |
84 | req->r_args.getattr.mask = cpu_to_le32(mask); | 145 | req->r_args.lookupino.mask = cpu_to_le32(mask); |
85 | 146 | ||
86 | req->r_ino1 = vino; | 147 | req->r_ino1 = vino; |
87 | req->r_num_caps = 1; | 148 | req->r_num_caps = 1; |
@@ -91,16 +152,114 @@ static struct dentry *__fh_to_dentry(struct super_block *sb, u64 ino) | |||
91 | ihold(inode); | 152 | ihold(inode); |
92 | ceph_mdsc_put_request(req); | 153 | ceph_mdsc_put_request(req); |
93 | if (!inode) | 154 | if (!inode) |
94 | return ERR_PTR(-ESTALE); | 155 | return err < 0 ? ERR_PTR(err) : ERR_PTR(-ESTALE); |
95 | if (inode->i_nlink == 0) { | ||
96 | iput(inode); | ||
97 | return ERR_PTR(-ESTALE); | ||
98 | } | ||
99 | } | 156 | } |
157 | return inode; | ||
158 | } | ||
159 | |||
160 | struct inode *ceph_lookup_inode(struct super_block *sb, u64 ino) | ||
161 | { | ||
162 | struct inode *inode = __lookup_inode(sb, ino); | ||
163 | if (IS_ERR(inode)) | ||
164 | return inode; | ||
165 | if (inode->i_nlink == 0) { | ||
166 | iput(inode); | ||
167 | return ERR_PTR(-ESTALE); | ||
168 | } | ||
169 | return inode; | ||
170 | } | ||
100 | 171 | ||
172 | static struct dentry *__fh_to_dentry(struct super_block *sb, u64 ino) | ||
173 | { | ||
174 | struct inode *inode = __lookup_inode(sb, ino); | ||
175 | if (IS_ERR(inode)) | ||
176 | return ERR_CAST(inode); | ||
177 | if (inode->i_nlink == 0) { | ||
178 | iput(inode); | ||
179 | return ERR_PTR(-ESTALE); | ||
180 | } | ||
101 | return d_obtain_alias(inode); | 181 | return d_obtain_alias(inode); |
102 | } | 182 | } |
103 | 183 | ||
184 | static struct dentry *__snapfh_to_dentry(struct super_block *sb, | ||
185 | struct ceph_nfs_snapfh *sfh, | ||
186 | bool want_parent) | ||
187 | { | ||
188 | struct ceph_mds_client *mdsc = ceph_sb_to_client(sb)->mdsc; | ||
189 | struct ceph_mds_request *req; | ||
190 | struct inode *inode; | ||
191 | struct ceph_vino vino; | ||
192 | int mask; | ||
193 | int err; | ||
194 | bool unlinked = false; | ||
195 | |||
196 | if (want_parent) { | ||
197 | vino.ino = sfh->parent_ino; | ||
198 | if (sfh->snapid == CEPH_SNAPDIR) | ||
199 | vino.snap = CEPH_NOSNAP; | ||
200 | else if (sfh->ino == sfh->parent_ino) | ||
201 | vino.snap = CEPH_SNAPDIR; | ||
202 | else | ||
203 | vino.snap = sfh->snapid; | ||
204 | } else { | ||
205 | vino.ino = sfh->ino; | ||
206 | vino.snap = sfh->snapid; | ||
207 | } | ||
208 | inode = ceph_find_inode(sb, vino); | ||
209 | if (inode) | ||
210 | return d_obtain_alias(inode); | ||
211 | |||
212 | req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPINO, | ||
213 | USE_ANY_MDS); | ||
214 | if (IS_ERR(req)) | ||
215 | return ERR_CAST(req); | ||
216 | |||
217 | mask = CEPH_STAT_CAP_INODE; | ||
218 | if (ceph_security_xattr_wanted(d_inode(sb->s_root))) | ||
219 | mask |= CEPH_CAP_XATTR_SHARED; | ||
220 | req->r_args.lookupino.mask = cpu_to_le32(mask); | ||
221 | if (vino.snap < CEPH_NOSNAP) { | ||
222 | req->r_args.lookupino.snapid = cpu_to_le64(vino.snap); | ||
223 | if (!want_parent && sfh->ino != sfh->parent_ino) { | ||
224 | req->r_args.lookupino.parent = | ||
225 | cpu_to_le64(sfh->parent_ino); | ||
226 | req->r_args.lookupino.hash = | ||
227 | cpu_to_le32(sfh->hash); | ||
228 | } | ||
229 | } | ||
230 | |||
231 | req->r_ino1 = vino; | ||
232 | req->r_num_caps = 1; | ||
233 | err = ceph_mdsc_do_request(mdsc, NULL, req); | ||
234 | inode = req->r_target_inode; | ||
235 | if (inode) { | ||
236 | if (vino.snap == CEPH_SNAPDIR) { | ||
237 | if (inode->i_nlink == 0) | ||
238 | unlinked = true; | ||
239 | inode = ceph_get_snapdir(inode); | ||
240 | } else if (ceph_snap(inode) == vino.snap) { | ||
241 | ihold(inode); | ||
242 | } else { | ||
243 | /* mds does not support lookup snapped inode */ | ||
244 | err = -EOPNOTSUPP; | ||
245 | inode = NULL; | ||
246 | } | ||
247 | } | ||
248 | ceph_mdsc_put_request(req); | ||
249 | |||
250 | if (want_parent) { | ||
251 | dout("snapfh_to_parent %llx.%llx\n err=%d\n", | ||
252 | vino.ino, vino.snap, err); | ||
253 | } else { | ||
254 | dout("snapfh_to_dentry %llx.%llx parent %llx hash %x err=%d", | ||
255 | vino.ino, vino.snap, sfh->parent_ino, sfh->hash, err); | ||
256 | } | ||
257 | if (!inode) | ||
258 | return ERR_PTR(-ESTALE); | ||
259 | /* see comments in ceph_get_parent() */ | ||
260 | return unlinked ? d_obtain_root(inode) : d_obtain_alias(inode); | ||
261 | } | ||
262 | |||
104 | /* | 263 | /* |
105 | * convert regular fh to dentry | 264 | * convert regular fh to dentry |
106 | */ | 265 | */ |
@@ -110,6 +269,11 @@ static struct dentry *ceph_fh_to_dentry(struct super_block *sb, | |||
110 | { | 269 | { |
111 | struct ceph_nfs_fh *fh = (void *)fid->raw; | 270 | struct ceph_nfs_fh *fh = (void *)fid->raw; |
112 | 271 | ||
272 | if (fh_type == FILEID_BTRFS_WITH_PARENT) { | ||
273 | struct ceph_nfs_snapfh *sfh = (void *)fid->raw; | ||
274 | return __snapfh_to_dentry(sb, sfh, false); | ||
275 | } | ||
276 | |||
113 | if (fh_type != FILEID_INO32_GEN && | 277 | if (fh_type != FILEID_INO32_GEN && |
114 | fh_type != FILEID_INO32_GEN_PARENT) | 278 | fh_type != FILEID_INO32_GEN_PARENT) |
115 | return NULL; | 279 | return NULL; |
@@ -163,13 +327,49 @@ static struct dentry *__get_parent(struct super_block *sb, | |||
163 | 327 | ||
164 | static struct dentry *ceph_get_parent(struct dentry *child) | 328 | static struct dentry *ceph_get_parent(struct dentry *child) |
165 | { | 329 | { |
166 | /* don't re-export snaps */ | 330 | struct inode *inode = d_inode(child); |
167 | if (ceph_snap(d_inode(child)) != CEPH_NOSNAP) | 331 | struct dentry *dn; |
168 | return ERR_PTR(-EINVAL); | 332 | |
169 | 333 | if (ceph_snap(inode) != CEPH_NOSNAP) { | |
170 | dout("get_parent %p ino %llx.%llx\n", | 334 | struct inode* dir; |
171 | child, ceph_vinop(d_inode(child))); | 335 | bool unlinked = false; |
172 | return __get_parent(child->d_sb, child, 0); | 336 | /* do not support non-directory */ |
337 | if (!d_is_dir(child)) { | ||
338 | dn = ERR_PTR(-EINVAL); | ||
339 | goto out; | ||
340 | } | ||
341 | dir = __lookup_inode(inode->i_sb, ceph_ino(inode)); | ||
342 | if (IS_ERR(dir)) { | ||
343 | dn = ERR_CAST(dir); | ||
344 | goto out; | ||
345 | } | ||
346 | /* There can be multiple paths to access snapped inode. | ||
347 | * For simplicity, treat snapdir of head inode as parent */ | ||
348 | if (ceph_snap(inode) != CEPH_SNAPDIR) { | ||
349 | struct inode *snapdir = ceph_get_snapdir(dir); | ||
350 | if (dir->i_nlink == 0) | ||
351 | unlinked = true; | ||
352 | iput(dir); | ||
353 | if (IS_ERR(snapdir)) { | ||
354 | dn = ERR_CAST(snapdir); | ||
355 | goto out; | ||
356 | } | ||
357 | dir = snapdir; | ||
358 | } | ||
359 | /* If directory has already been deleted, futher get_parent | ||
360 | * will fail. Do not mark snapdir dentry as disconnected, | ||
361 | * this prevent exportfs from doing futher get_parent. */ | ||
362 | if (unlinked) | ||
363 | dn = d_obtain_root(dir); | ||
364 | else | ||
365 | dn = d_obtain_alias(dir); | ||
366 | } else { | ||
367 | dn = __get_parent(child->d_sb, child, 0); | ||
368 | } | ||
369 | out: | ||
370 | dout("get_parent %p ino %llx.%llx err=%ld\n", | ||
371 | child, ceph_vinop(inode), (IS_ERR(dn) ? PTR_ERR(dn) : 0)); | ||
372 | return dn; | ||
173 | } | 373 | } |
174 | 374 | ||
175 | /* | 375 | /* |
@@ -182,6 +382,11 @@ static struct dentry *ceph_fh_to_parent(struct super_block *sb, | |||
182 | struct ceph_nfs_confh *cfh = (void *)fid->raw; | 382 | struct ceph_nfs_confh *cfh = (void *)fid->raw; |
183 | struct dentry *dentry; | 383 | struct dentry *dentry; |
184 | 384 | ||
385 | if (fh_type == FILEID_BTRFS_WITH_PARENT) { | ||
386 | struct ceph_nfs_snapfh *sfh = (void *)fid->raw; | ||
387 | return __snapfh_to_dentry(sb, sfh, true); | ||
388 | } | ||
389 | |||
185 | if (fh_type != FILEID_INO32_GEN_PARENT) | 390 | if (fh_type != FILEID_INO32_GEN_PARENT) |
186 | return NULL; | 391 | return NULL; |
187 | if (fh_len < sizeof(*cfh) / 4) | 392 | if (fh_len < sizeof(*cfh) / 4) |
@@ -194,14 +399,115 @@ static struct dentry *ceph_fh_to_parent(struct super_block *sb, | |||
194 | return dentry; | 399 | return dentry; |
195 | } | 400 | } |
196 | 401 | ||
402 | static int __get_snap_name(struct dentry *parent, char *name, | ||
403 | struct dentry *child) | ||
404 | { | ||
405 | struct inode *inode = d_inode(child); | ||
406 | struct inode *dir = d_inode(parent); | ||
407 | struct ceph_fs_client *fsc = ceph_inode_to_client(inode); | ||
408 | struct ceph_mds_request *req = NULL; | ||
409 | char *last_name = NULL; | ||
410 | unsigned next_offset = 2; | ||
411 | int err = -EINVAL; | ||
412 | |||
413 | if (ceph_ino(inode) != ceph_ino(dir)) | ||
414 | goto out; | ||
415 | if (ceph_snap(inode) == CEPH_SNAPDIR) { | ||
416 | if (ceph_snap(dir) == CEPH_NOSNAP) { | ||
417 | strcpy(name, fsc->mount_options->snapdir_name); | ||
418 | err = 0; | ||
419 | } | ||
420 | goto out; | ||
421 | } | ||
422 | if (ceph_snap(dir) != CEPH_SNAPDIR) | ||
423 | goto out; | ||
424 | |||
425 | while (1) { | ||
426 | struct ceph_mds_reply_info_parsed *rinfo; | ||
427 | struct ceph_mds_reply_dir_entry *rde; | ||
428 | int i; | ||
429 | |||
430 | req = ceph_mdsc_create_request(fsc->mdsc, CEPH_MDS_OP_LSSNAP, | ||
431 | USE_AUTH_MDS); | ||
432 | if (IS_ERR(req)) { | ||
433 | err = PTR_ERR(req); | ||
434 | req = NULL; | ||
435 | goto out; | ||
436 | } | ||
437 | err = ceph_alloc_readdir_reply_buffer(req, inode); | ||
438 | if (err) | ||
439 | goto out; | ||
440 | |||
441 | req->r_direct_mode = USE_AUTH_MDS; | ||
442 | req->r_readdir_offset = next_offset; | ||
443 | req->r_args.readdir.flags = | ||
444 | cpu_to_le16(CEPH_READDIR_REPLY_BITFLAGS); | ||
445 | if (last_name) { | ||
446 | req->r_path2 = last_name; | ||
447 | last_name = NULL; | ||
448 | } | ||
449 | |||
450 | req->r_inode = dir; | ||
451 | ihold(dir); | ||
452 | req->r_dentry = dget(parent); | ||
453 | |||
454 | inode_lock(dir); | ||
455 | err = ceph_mdsc_do_request(fsc->mdsc, NULL, req); | ||
456 | inode_unlock(dir); | ||
457 | |||
458 | if (err < 0) | ||
459 | goto out; | ||
460 | |||
461 | rinfo = &req->r_reply_info; | ||
462 | for (i = 0; i < rinfo->dir_nr; i++) { | ||
463 | rde = rinfo->dir_entries + i; | ||
464 | BUG_ON(!rde->inode.in); | ||
465 | if (ceph_snap(inode) == | ||
466 | le64_to_cpu(rde->inode.in->snapid)) { | ||
467 | memcpy(name, rde->name, rde->name_len); | ||
468 | name[rde->name_len] = '\0'; | ||
469 | err = 0; | ||
470 | goto out; | ||
471 | } | ||
472 | } | ||
473 | |||
474 | if (rinfo->dir_end) | ||
475 | break; | ||
476 | |||
477 | BUG_ON(rinfo->dir_nr <= 0); | ||
478 | rde = rinfo->dir_entries + (rinfo->dir_nr - 1); | ||
479 | next_offset += rinfo->dir_nr; | ||
480 | last_name = kstrndup(rde->name, rde->name_len, GFP_KERNEL); | ||
481 | if (!last_name) { | ||
482 | err = -ENOMEM; | ||
483 | goto out; | ||
484 | } | ||
485 | |||
486 | ceph_mdsc_put_request(req); | ||
487 | req = NULL; | ||
488 | } | ||
489 | err = -ENOENT; | ||
490 | out: | ||
491 | if (req) | ||
492 | ceph_mdsc_put_request(req); | ||
493 | kfree(last_name); | ||
494 | dout("get_snap_name %p ino %llx.%llx err=%d\n", | ||
495 | child, ceph_vinop(inode), err); | ||
496 | return err; | ||
497 | } | ||
498 | |||
197 | static int ceph_get_name(struct dentry *parent, char *name, | 499 | static int ceph_get_name(struct dentry *parent, char *name, |
198 | struct dentry *child) | 500 | struct dentry *child) |
199 | { | 501 | { |
200 | struct ceph_mds_client *mdsc; | 502 | struct ceph_mds_client *mdsc; |
201 | struct ceph_mds_request *req; | 503 | struct ceph_mds_request *req; |
504 | struct inode *inode = d_inode(child); | ||
202 | int err; | 505 | int err; |
203 | 506 | ||
204 | mdsc = ceph_inode_to_client(d_inode(child))->mdsc; | 507 | if (ceph_snap(inode) != CEPH_NOSNAP) |
508 | return __get_snap_name(parent, name, child); | ||
509 | |||
510 | mdsc = ceph_inode_to_client(inode)->mdsc; | ||
205 | req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPNAME, | 511 | req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPNAME, |
206 | USE_ANY_MDS); | 512 | USE_ANY_MDS); |
207 | if (IS_ERR(req)) | 513 | if (IS_ERR(req)) |
@@ -209,8 +515,8 @@ static int ceph_get_name(struct dentry *parent, char *name, | |||
209 | 515 | ||
210 | inode_lock(d_inode(parent)); | 516 | inode_lock(d_inode(parent)); |
211 | 517 | ||
212 | req->r_inode = d_inode(child); | 518 | req->r_inode = inode; |
213 | ihold(d_inode(child)); | 519 | ihold(inode); |
214 | req->r_ino2 = ceph_vino(d_inode(parent)); | 520 | req->r_ino2 = ceph_vino(d_inode(parent)); |
215 | req->r_parent = d_inode(parent); | 521 | req->r_parent = d_inode(parent); |
216 | set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags); | 522 | set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags); |
@@ -224,10 +530,10 @@ static int ceph_get_name(struct dentry *parent, char *name, | |||
224 | memcpy(name, rinfo->dname, rinfo->dname_len); | 530 | memcpy(name, rinfo->dname, rinfo->dname_len); |
225 | name[rinfo->dname_len] = 0; | 531 | name[rinfo->dname_len] = 0; |
226 | dout("get_name %p ino %llx.%llx name %s\n", | 532 | dout("get_name %p ino %llx.%llx name %s\n", |
227 | child, ceph_vinop(d_inode(child)), name); | 533 | child, ceph_vinop(inode), name); |
228 | } else { | 534 | } else { |
229 | dout("get_name %p ino %llx.%llx err %d\n", | 535 | dout("get_name %p ino %llx.%llx err %d\n", |
230 | child, ceph_vinop(d_inode(child)), err); | 536 | child, ceph_vinop(inode), err); |
231 | } | 537 | } |
232 | 538 | ||
233 | ceph_mdsc_put_request(req); | 539 | ceph_mdsc_put_request(req); |
diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 84725b53ac21..305daf043eb0 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c | |||
@@ -929,7 +929,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter, | |||
929 | 929 | ||
930 | dout("sync_direct_%s on file %p %lld~%u snapc %p seq %lld\n", | 930 | dout("sync_direct_%s on file %p %lld~%u snapc %p seq %lld\n", |
931 | (write ? "write" : "read"), file, pos, (unsigned)count, | 931 | (write ? "write" : "read"), file, pos, (unsigned)count, |
932 | snapc, snapc->seq); | 932 | snapc, snapc ? snapc->seq : 0); |
933 | 933 | ||
934 | ret = filemap_write_and_wait_range(inode->i_mapping, | 934 | ret = filemap_write_and_wait_range(inode->i_mapping, |
935 | pos, pos + count - 1); | 935 | pos, pos + count - 1); |
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 35dae6d5493a..f85355bf49c4 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c | |||
@@ -2266,43 +2266,72 @@ int ceph_permission(struct inode *inode, int mask) | |||
2266 | return err; | 2266 | return err; |
2267 | } | 2267 | } |
2268 | 2268 | ||
2269 | /* Craft a mask of needed caps given a set of requested statx attrs. */ | ||
2270 | static int statx_to_caps(u32 want) | ||
2271 | { | ||
2272 | int mask = 0; | ||
2273 | |||
2274 | if (want & (STATX_MODE|STATX_UID|STATX_GID|STATX_CTIME)) | ||
2275 | mask |= CEPH_CAP_AUTH_SHARED; | ||
2276 | |||
2277 | if (want & (STATX_NLINK|STATX_CTIME)) | ||
2278 | mask |= CEPH_CAP_LINK_SHARED; | ||
2279 | |||
2280 | if (want & (STATX_ATIME|STATX_MTIME|STATX_CTIME|STATX_SIZE| | ||
2281 | STATX_BLOCKS)) | ||
2282 | mask |= CEPH_CAP_FILE_SHARED; | ||
2283 | |||
2284 | if (want & (STATX_CTIME)) | ||
2285 | mask |= CEPH_CAP_XATTR_SHARED; | ||
2286 | |||
2287 | return mask; | ||
2288 | } | ||
2289 | |||
2269 | /* | 2290 | /* |
2270 | * Get all attributes. Hopefully somedata we'll have a statlite() | 2291 | * Get all the attributes. If we have sufficient caps for the requested attrs, |
2271 | * and can limit the fields we require to be accurate. | 2292 | * then we can avoid talking to the MDS at all. |
2272 | */ | 2293 | */ |
2273 | int ceph_getattr(const struct path *path, struct kstat *stat, | 2294 | int ceph_getattr(const struct path *path, struct kstat *stat, |
2274 | u32 request_mask, unsigned int flags) | 2295 | u32 request_mask, unsigned int flags) |
2275 | { | 2296 | { |
2276 | struct inode *inode = d_inode(path->dentry); | 2297 | struct inode *inode = d_inode(path->dentry); |
2277 | struct ceph_inode_info *ci = ceph_inode(inode); | 2298 | struct ceph_inode_info *ci = ceph_inode(inode); |
2278 | int err; | 2299 | int err = 0; |
2279 | 2300 | ||
2280 | err = ceph_do_getattr(inode, CEPH_STAT_CAP_INODE_ALL, false); | 2301 | /* Skip the getattr altogether if we're asked not to sync */ |
2281 | if (!err) { | 2302 | if (!(flags & AT_STATX_DONT_SYNC)) { |
2282 | generic_fillattr(inode, stat); | 2303 | err = ceph_do_getattr(inode, statx_to_caps(request_mask), |
2283 | stat->ino = ceph_translate_ino(inode->i_sb, inode->i_ino); | 2304 | flags & AT_STATX_FORCE_SYNC); |
2284 | if (ceph_snap(inode) == CEPH_NOSNAP) | 2305 | if (err) |
2285 | stat->dev = inode->i_sb->s_dev; | 2306 | return err; |
2307 | } | ||
2308 | |||
2309 | generic_fillattr(inode, stat); | ||
2310 | stat->ino = ceph_translate_ino(inode->i_sb, inode->i_ino); | ||
2311 | if (ceph_snap(inode) == CEPH_NOSNAP) | ||
2312 | stat->dev = inode->i_sb->s_dev; | ||
2313 | else | ||
2314 | stat->dev = ci->i_snapid_map ? ci->i_snapid_map->dev : 0; | ||
2315 | |||
2316 | if (S_ISDIR(inode->i_mode)) { | ||
2317 | if (ceph_test_mount_opt(ceph_sb_to_client(inode->i_sb), | ||
2318 | RBYTES)) | ||
2319 | stat->size = ci->i_rbytes; | ||
2286 | else | 2320 | else |
2287 | stat->dev = ci->i_snapid_map ? ci->i_snapid_map->dev : 0; | 2321 | stat->size = ci->i_files + ci->i_subdirs; |
2288 | 2322 | stat->blocks = 0; | |
2289 | if (S_ISDIR(inode->i_mode)) { | 2323 | stat->blksize = 65536; |
2290 | if (ceph_test_mount_opt(ceph_sb_to_client(inode->i_sb), | 2324 | /* |
2291 | RBYTES)) | 2325 | * Some applications rely on the number of st_nlink |
2292 | stat->size = ci->i_rbytes; | 2326 | * value on directories to be either 0 (if unlinked) |
2293 | else | 2327 | * or 2 + number of subdirectories. |
2294 | stat->size = ci->i_files + ci->i_subdirs; | 2328 | */ |
2295 | stat->blocks = 0; | 2329 | if (stat->nlink == 1) |
2296 | stat->blksize = 65536; | 2330 | /* '.' + '..' + subdirs */ |
2297 | /* | 2331 | stat->nlink = 1 + 1 + ci->i_subdirs; |
2298 | * Some applications rely on the number of st_nlink | ||
2299 | * value on directories to be either 0 (if unlinked) | ||
2300 | * or 2 + number of subdirectories. | ||
2301 | */ | ||
2302 | if (stat->nlink == 1) | ||
2303 | /* '.' + '..' + subdirs */ | ||
2304 | stat->nlink = 1 + 1 + ci->i_subdirs; | ||
2305 | } | ||
2306 | } | 2332 | } |
2333 | |||
2334 | /* Mask off any higher bits (e.g. btime) until we have support */ | ||
2335 | stat->result_mask = request_mask & STATX_BASIC_STATS; | ||
2307 | return err; | 2336 | return err; |
2308 | } | 2337 | } |
diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c index 9dae2ec7e1fa..ac9b53b89365 100644 --- a/fs/ceph/locks.c +++ b/fs/ceph/locks.c | |||
@@ -237,15 +237,6 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl) | |||
237 | spin_lock(&ci->i_ceph_lock); | 237 | spin_lock(&ci->i_ceph_lock); |
238 | if (ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK) { | 238 | if (ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK) { |
239 | err = -EIO; | 239 | err = -EIO; |
240 | } else if (op == CEPH_MDS_OP_SETFILELOCK) { | ||
241 | /* | ||
242 | * increasing i_filelock_ref closes race window between | ||
243 | * handling request reply and adding file_lock struct to | ||
244 | * inode. Otherwise, i_auth_cap may get trimmed in the | ||
245 | * window. Caller function will decrease the counter. | ||
246 | */ | ||
247 | fl->fl_ops = &ceph_fl_lock_ops; | ||
248 | atomic_inc(&ci->i_filelock_ref); | ||
249 | } | 240 | } |
250 | spin_unlock(&ci->i_ceph_lock); | 241 | spin_unlock(&ci->i_ceph_lock); |
251 | if (err < 0) { | 242 | if (err < 0) { |
@@ -299,10 +290,6 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl) | |||
299 | spin_lock(&ci->i_ceph_lock); | 290 | spin_lock(&ci->i_ceph_lock); |
300 | if (ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK) { | 291 | if (ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK) { |
301 | err = -EIO; | 292 | err = -EIO; |
302 | } else { | ||
303 | /* see comment in ceph_lock */ | ||
304 | fl->fl_ops = &ceph_fl_lock_ops; | ||
305 | atomic_inc(&ci->i_filelock_ref); | ||
306 | } | 293 | } |
307 | spin_unlock(&ci->i_ceph_lock); | 294 | spin_unlock(&ci->i_ceph_lock); |
308 | if (err < 0) { | 295 | if (err < 0) { |
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 9049c2a3e972..959b1bf7c327 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c | |||
@@ -550,15 +550,9 @@ void ceph_put_mds_session(struct ceph_mds_session *s) | |||
550 | struct ceph_mds_session *__ceph_lookup_mds_session(struct ceph_mds_client *mdsc, | 550 | struct ceph_mds_session *__ceph_lookup_mds_session(struct ceph_mds_client *mdsc, |
551 | int mds) | 551 | int mds) |
552 | { | 552 | { |
553 | struct ceph_mds_session *session; | ||
554 | |||
555 | if (mds >= mdsc->max_sessions || !mdsc->sessions[mds]) | 553 | if (mds >= mdsc->max_sessions || !mdsc->sessions[mds]) |
556 | return NULL; | 554 | return NULL; |
557 | session = mdsc->sessions[mds]; | 555 | return get_session(mdsc->sessions[mds]); |
558 | dout("lookup_mds_session %p %d\n", session, | ||
559 | refcount_read(&session->s_ref)); | ||
560 | get_session(session); | ||
561 | return session; | ||
562 | } | 556 | } |
563 | 557 | ||
564 | static bool __have_session(struct ceph_mds_client *mdsc, int mds) | 558 | static bool __have_session(struct ceph_mds_client *mdsc, int mds) |
@@ -1284,9 +1278,9 @@ static void cleanup_session_requests(struct ceph_mds_client *mdsc, | |||
1284 | * | 1278 | * |
1285 | * Caller must hold session s_mutex. | 1279 | * Caller must hold session s_mutex. |
1286 | */ | 1280 | */ |
1287 | static int iterate_session_caps(struct ceph_mds_session *session, | 1281 | int ceph_iterate_session_caps(struct ceph_mds_session *session, |
1288 | int (*cb)(struct inode *, struct ceph_cap *, | 1282 | int (*cb)(struct inode *, struct ceph_cap *, |
1289 | void *), void *arg) | 1283 | void *), void *arg) |
1290 | { | 1284 | { |
1291 | struct list_head *p; | 1285 | struct list_head *p; |
1292 | struct ceph_cap *cap; | 1286 | struct ceph_cap *cap; |
@@ -1451,7 +1445,7 @@ static void remove_session_caps(struct ceph_mds_session *session) | |||
1451 | LIST_HEAD(dispose); | 1445 | LIST_HEAD(dispose); |
1452 | 1446 | ||
1453 | dout("remove_session_caps on %p\n", session); | 1447 | dout("remove_session_caps on %p\n", session); |
1454 | iterate_session_caps(session, remove_session_caps_cb, fsc); | 1448 | ceph_iterate_session_caps(session, remove_session_caps_cb, fsc); |
1455 | 1449 | ||
1456 | wake_up_all(&fsc->mdsc->cap_flushing_wq); | 1450 | wake_up_all(&fsc->mdsc->cap_flushing_wq); |
1457 | 1451 | ||
@@ -1534,8 +1528,8 @@ static int wake_up_session_cb(struct inode *inode, struct ceph_cap *cap, | |||
1534 | static void wake_up_session_caps(struct ceph_mds_session *session, int ev) | 1528 | static void wake_up_session_caps(struct ceph_mds_session *session, int ev) |
1535 | { | 1529 | { |
1536 | dout("wake_up_session_caps %p mds%d\n", session, session->s_mds); | 1530 | dout("wake_up_session_caps %p mds%d\n", session, session->s_mds); |
1537 | iterate_session_caps(session, wake_up_session_cb, | 1531 | ceph_iterate_session_caps(session, wake_up_session_cb, |
1538 | (void *)(unsigned long)ev); | 1532 | (void *)(unsigned long)ev); |
1539 | } | 1533 | } |
1540 | 1534 | ||
1541 | /* | 1535 | /* |
@@ -1768,7 +1762,7 @@ int ceph_trim_caps(struct ceph_mds_client *mdsc, | |||
1768 | session->s_mds, session->s_nr_caps, max_caps, trim_caps); | 1762 | session->s_mds, session->s_nr_caps, max_caps, trim_caps); |
1769 | if (trim_caps > 0) { | 1763 | if (trim_caps > 0) { |
1770 | session->s_trim_caps = trim_caps; | 1764 | session->s_trim_caps = trim_caps; |
1771 | iterate_session_caps(session, trim_caps_cb, session); | 1765 | ceph_iterate_session_caps(session, trim_caps_cb, session); |
1772 | dout("trim_caps mds%d done: %d / %d, trimmed %d\n", | 1766 | dout("trim_caps mds%d done: %d / %d, trimmed %d\n", |
1773 | session->s_mds, session->s_nr_caps, max_caps, | 1767 | session->s_mds, session->s_nr_caps, max_caps, |
1774 | trim_caps - session->s_trim_caps); | 1768 | trim_caps - session->s_trim_caps); |
@@ -1861,7 +1855,8 @@ again: | |||
1861 | num_cap_releases--; | 1855 | num_cap_releases--; |
1862 | 1856 | ||
1863 | head = msg->front.iov_base; | 1857 | head = msg->front.iov_base; |
1864 | le32_add_cpu(&head->num, 1); | 1858 | put_unaligned_le32(get_unaligned_le32(&head->num) + 1, |
1859 | &head->num); | ||
1865 | item = msg->front.iov_base + msg->front.iov_len; | 1860 | item = msg->front.iov_base + msg->front.iov_len; |
1866 | item->ino = cpu_to_le64(cap->cap_ino); | 1861 | item->ino = cpu_to_le64(cap->cap_ino); |
1867 | item->cap_id = cpu_to_le64(cap->cap_id); | 1862 | item->cap_id = cpu_to_le64(cap->cap_id); |
@@ -2089,43 +2084,29 @@ static inline u64 __get_oldest_tid(struct ceph_mds_client *mdsc) | |||
2089 | * Encode hidden .snap dirs as a double /, i.e. | 2084 | * Encode hidden .snap dirs as a double /, i.e. |
2090 | * foo/.snap/bar -> foo//bar | 2085 | * foo/.snap/bar -> foo//bar |
2091 | */ | 2086 | */ |
2092 | char *ceph_mdsc_build_path(struct dentry *dentry, int *plen, u64 *base, | 2087 | char *ceph_mdsc_build_path(struct dentry *dentry, int *plen, u64 *pbase, |
2093 | int stop_on_nosnap) | 2088 | int stop_on_nosnap) |
2094 | { | 2089 | { |
2095 | struct dentry *temp; | 2090 | struct dentry *temp; |
2096 | char *path; | 2091 | char *path; |
2097 | int len, pos; | 2092 | int pos; |
2098 | unsigned seq; | 2093 | unsigned seq; |
2094 | u64 base; | ||
2099 | 2095 | ||
2100 | if (!dentry) | 2096 | if (!dentry) |
2101 | return ERR_PTR(-EINVAL); | 2097 | return ERR_PTR(-EINVAL); |
2102 | 2098 | ||
2103 | retry: | 2099 | path = __getname(); |
2104 | len = 0; | ||
2105 | seq = read_seqbegin(&rename_lock); | ||
2106 | rcu_read_lock(); | ||
2107 | for (temp = dentry; !IS_ROOT(temp);) { | ||
2108 | struct inode *inode = d_inode(temp); | ||
2109 | if (inode && ceph_snap(inode) == CEPH_SNAPDIR) | ||
2110 | len++; /* slash only */ | ||
2111 | else if (stop_on_nosnap && inode && | ||
2112 | ceph_snap(inode) == CEPH_NOSNAP) | ||
2113 | break; | ||
2114 | else | ||
2115 | len += 1 + temp->d_name.len; | ||
2116 | temp = temp->d_parent; | ||
2117 | } | ||
2118 | rcu_read_unlock(); | ||
2119 | if (len) | ||
2120 | len--; /* no leading '/' */ | ||
2121 | |||
2122 | path = kmalloc(len+1, GFP_NOFS); | ||
2123 | if (!path) | 2100 | if (!path) |
2124 | return ERR_PTR(-ENOMEM); | 2101 | return ERR_PTR(-ENOMEM); |
2125 | pos = len; | 2102 | retry: |
2126 | path[pos] = 0; /* trailing null */ | 2103 | pos = PATH_MAX - 1; |
2104 | path[pos] = '\0'; | ||
2105 | |||
2106 | seq = read_seqbegin(&rename_lock); | ||
2127 | rcu_read_lock(); | 2107 | rcu_read_lock(); |
2128 | for (temp = dentry; !IS_ROOT(temp) && pos != 0; ) { | 2108 | temp = dentry; |
2109 | for (;;) { | ||
2129 | struct inode *inode; | 2110 | struct inode *inode; |
2130 | 2111 | ||
2131 | spin_lock(&temp->d_lock); | 2112 | spin_lock(&temp->d_lock); |
@@ -2143,83 +2124,54 @@ retry: | |||
2143 | spin_unlock(&temp->d_lock); | 2124 | spin_unlock(&temp->d_lock); |
2144 | break; | 2125 | break; |
2145 | } | 2126 | } |
2146 | strncpy(path + pos, temp->d_name.name, | 2127 | memcpy(path + pos, temp->d_name.name, temp->d_name.len); |
2147 | temp->d_name.len); | ||
2148 | } | 2128 | } |
2149 | spin_unlock(&temp->d_lock); | 2129 | spin_unlock(&temp->d_lock); |
2150 | if (pos) | ||
2151 | path[--pos] = '/'; | ||
2152 | temp = temp->d_parent; | 2130 | temp = temp->d_parent; |
2131 | |||
2132 | /* Are we at the root? */ | ||
2133 | if (IS_ROOT(temp)) | ||
2134 | break; | ||
2135 | |||
2136 | /* Are we out of buffer? */ | ||
2137 | if (--pos < 0) | ||
2138 | break; | ||
2139 | |||
2140 | path[pos] = '/'; | ||
2153 | } | 2141 | } |
2142 | base = ceph_ino(d_inode(temp)); | ||
2154 | rcu_read_unlock(); | 2143 | rcu_read_unlock(); |
2155 | if (pos != 0 || read_seqretry(&rename_lock, seq)) { | 2144 | if (pos < 0 || read_seqretry(&rename_lock, seq)) { |
2156 | pr_err("build_path did not end path lookup where " | 2145 | pr_err("build_path did not end path lookup where " |
2157 | "expected, namelen is %d, pos is %d\n", len, pos); | 2146 | "expected, pos is %d\n", pos); |
2158 | /* presumably this is only possible if racing with a | 2147 | /* presumably this is only possible if racing with a |
2159 | rename of one of the parent directories (we can not | 2148 | rename of one of the parent directories (we can not |
2160 | lock the dentries above us to prevent this, but | 2149 | lock the dentries above us to prevent this, but |
2161 | retrying should be harmless) */ | 2150 | retrying should be harmless) */ |
2162 | kfree(path); | ||
2163 | goto retry; | 2151 | goto retry; |
2164 | } | 2152 | } |
2165 | 2153 | ||
2166 | *base = ceph_ino(d_inode(temp)); | 2154 | *pbase = base; |
2167 | *plen = len; | 2155 | *plen = PATH_MAX - 1 - pos; |
2168 | dout("build_path on %p %d built %llx '%.*s'\n", | 2156 | dout("build_path on %p %d built %llx '%.*s'\n", |
2169 | dentry, d_count(dentry), *base, len, path); | 2157 | dentry, d_count(dentry), base, *plen, path + pos); |
2170 | return path; | 2158 | return path + pos; |
2171 | } | ||
2172 | |||
2173 | /* Duplicate the dentry->d_name.name safely */ | ||
2174 | static int clone_dentry_name(struct dentry *dentry, const char **ppath, | ||
2175 | int *ppathlen) | ||
2176 | { | ||
2177 | u32 len; | ||
2178 | char *name; | ||
2179 | |||
2180 | retry: | ||
2181 | len = READ_ONCE(dentry->d_name.len); | ||
2182 | name = kmalloc(len + 1, GFP_NOFS); | ||
2183 | if (!name) | ||
2184 | return -ENOMEM; | ||
2185 | |||
2186 | spin_lock(&dentry->d_lock); | ||
2187 | if (dentry->d_name.len != len) { | ||
2188 | spin_unlock(&dentry->d_lock); | ||
2189 | kfree(name); | ||
2190 | goto retry; | ||
2191 | } | ||
2192 | memcpy(name, dentry->d_name.name, len); | ||
2193 | spin_unlock(&dentry->d_lock); | ||
2194 | |||
2195 | name[len] = '\0'; | ||
2196 | *ppath = name; | ||
2197 | *ppathlen = len; | ||
2198 | return 0; | ||
2199 | } | 2159 | } |
2200 | 2160 | ||
2201 | static int build_dentry_path(struct dentry *dentry, struct inode *dir, | 2161 | static int build_dentry_path(struct dentry *dentry, struct inode *dir, |
2202 | const char **ppath, int *ppathlen, u64 *pino, | 2162 | const char **ppath, int *ppathlen, u64 *pino, |
2203 | bool *pfreepath, bool parent_locked) | 2163 | bool *pfreepath, bool parent_locked) |
2204 | { | 2164 | { |
2205 | int ret; | ||
2206 | char *path; | 2165 | char *path; |
2207 | 2166 | ||
2208 | rcu_read_lock(); | 2167 | rcu_read_lock(); |
2209 | if (!dir) | 2168 | if (!dir) |
2210 | dir = d_inode_rcu(dentry->d_parent); | 2169 | dir = d_inode_rcu(dentry->d_parent); |
2211 | if (dir && ceph_snap(dir) == CEPH_NOSNAP) { | 2170 | if (dir && parent_locked && ceph_snap(dir) == CEPH_NOSNAP) { |
2212 | *pino = ceph_ino(dir); | 2171 | *pino = ceph_ino(dir); |
2213 | rcu_read_unlock(); | 2172 | rcu_read_unlock(); |
2214 | if (parent_locked) { | 2173 | *ppath = dentry->d_name.name; |
2215 | *ppath = dentry->d_name.name; | 2174 | *ppathlen = dentry->d_name.len; |
2216 | *ppathlen = dentry->d_name.len; | ||
2217 | } else { | ||
2218 | ret = clone_dentry_name(dentry, ppath, ppathlen); | ||
2219 | if (ret) | ||
2220 | return ret; | ||
2221 | *pfreepath = true; | ||
2222 | } | ||
2223 | return 0; | 2175 | return 0; |
2224 | } | 2176 | } |
2225 | rcu_read_unlock(); | 2177 | rcu_read_unlock(); |
@@ -2331,9 +2283,9 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc, | |||
2331 | (!!req->r_inode_drop + !!req->r_dentry_drop + | 2283 | (!!req->r_inode_drop + !!req->r_dentry_drop + |
2332 | !!req->r_old_inode_drop + !!req->r_old_dentry_drop); | 2284 | !!req->r_old_inode_drop + !!req->r_old_dentry_drop); |
2333 | if (req->r_dentry_drop) | 2285 | if (req->r_dentry_drop) |
2334 | len += req->r_dentry->d_name.len; | 2286 | len += pathlen1; |
2335 | if (req->r_old_dentry_drop) | 2287 | if (req->r_old_dentry_drop) |
2336 | len += req->r_old_dentry->d_name.len; | 2288 | len += pathlen2; |
2337 | 2289 | ||
2338 | msg = ceph_msg_new2(CEPH_MSG_CLIENT_REQUEST, len, 1, GFP_NOFS, false); | 2290 | msg = ceph_msg_new2(CEPH_MSG_CLIENT_REQUEST, len, 1, GFP_NOFS, false); |
2339 | if (!msg) { | 2291 | if (!msg) { |
@@ -2410,10 +2362,10 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc, | |||
2410 | 2362 | ||
2411 | out_free2: | 2363 | out_free2: |
2412 | if (freepath2) | 2364 | if (freepath2) |
2413 | kfree((char *)path2); | 2365 | ceph_mdsc_free_path((char *)path2, pathlen2); |
2414 | out_free1: | 2366 | out_free1: |
2415 | if (freepath1) | 2367 | if (freepath1) |
2416 | kfree((char *)path1); | 2368 | ceph_mdsc_free_path((char *)path1, pathlen1); |
2417 | out: | 2369 | out: |
2418 | return msg; | 2370 | return msg; |
2419 | } | 2371 | } |
@@ -2427,8 +2379,7 @@ static void complete_request(struct ceph_mds_client *mdsc, | |||
2427 | { | 2379 | { |
2428 | if (req->r_callback) | 2380 | if (req->r_callback) |
2429 | req->r_callback(mdsc, req); | 2381 | req->r_callback(mdsc, req); |
2430 | else | 2382 | complete_all(&req->r_completion); |
2431 | complete_all(&req->r_completion); | ||
2432 | } | 2383 | } |
2433 | 2384 | ||
2434 | /* | 2385 | /* |
@@ -2670,28 +2621,11 @@ static void kick_requests(struct ceph_mds_client *mdsc, int mds) | |||
2670 | } | 2621 | } |
2671 | } | 2622 | } |
2672 | 2623 | ||
2673 | void ceph_mdsc_submit_request(struct ceph_mds_client *mdsc, | 2624 | int ceph_mdsc_submit_request(struct ceph_mds_client *mdsc, struct inode *dir, |
2674 | struct ceph_mds_request *req) | 2625 | struct ceph_mds_request *req) |
2675 | { | 2626 | { |
2676 | dout("submit_request on %p\n", req); | ||
2677 | mutex_lock(&mdsc->mutex); | ||
2678 | __register_request(mdsc, req, NULL); | ||
2679 | __do_request(mdsc, req); | ||
2680 | mutex_unlock(&mdsc->mutex); | ||
2681 | } | ||
2682 | |||
2683 | /* | ||
2684 | * Synchrously perform an mds request. Take care of all of the | ||
2685 | * session setup, forwarding, retry details. | ||
2686 | */ | ||
2687 | int ceph_mdsc_do_request(struct ceph_mds_client *mdsc, | ||
2688 | struct inode *dir, | ||
2689 | struct ceph_mds_request *req) | ||
2690 | { | ||
2691 | int err; | 2627 | int err; |
2692 | 2628 | ||
2693 | dout("do_request on %p\n", req); | ||
2694 | |||
2695 | /* take CAP_PIN refs for r_inode, r_parent, r_old_dentry */ | 2629 | /* take CAP_PIN refs for r_inode, r_parent, r_old_dentry */ |
2696 | if (req->r_inode) | 2630 | if (req->r_inode) |
2697 | ceph_get_cap_refs(ceph_inode(req->r_inode), CEPH_CAP_PIN); | 2631 | ceph_get_cap_refs(ceph_inode(req->r_inode), CEPH_CAP_PIN); |
@@ -2701,18 +2635,21 @@ int ceph_mdsc_do_request(struct ceph_mds_client *mdsc, | |||
2701 | ceph_get_cap_refs(ceph_inode(req->r_old_dentry_dir), | 2635 | ceph_get_cap_refs(ceph_inode(req->r_old_dentry_dir), |
2702 | CEPH_CAP_PIN); | 2636 | CEPH_CAP_PIN); |
2703 | 2637 | ||
2704 | /* issue */ | 2638 | dout("submit_request on %p for inode %p\n", req, dir); |
2705 | mutex_lock(&mdsc->mutex); | 2639 | mutex_lock(&mdsc->mutex); |
2706 | __register_request(mdsc, req, dir); | 2640 | __register_request(mdsc, req, dir); |
2707 | __do_request(mdsc, req); | 2641 | __do_request(mdsc, req); |
2642 | err = req->r_err; | ||
2643 | mutex_unlock(&mdsc->mutex); | ||
2644 | return err; | ||
2645 | } | ||
2708 | 2646 | ||
2709 | if (req->r_err) { | 2647 | static int ceph_mdsc_wait_request(struct ceph_mds_client *mdsc, |
2710 | err = req->r_err; | 2648 | struct ceph_mds_request *req) |
2711 | goto out; | 2649 | { |
2712 | } | 2650 | int err; |
2713 | 2651 | ||
2714 | /* wait */ | 2652 | /* wait */ |
2715 | mutex_unlock(&mdsc->mutex); | ||
2716 | dout("do_request waiting\n"); | 2653 | dout("do_request waiting\n"); |
2717 | if (!req->r_timeout && req->r_wait_for_completion) { | 2654 | if (!req->r_timeout && req->r_wait_for_completion) { |
2718 | err = req->r_wait_for_completion(mdsc, req); | 2655 | err = req->r_wait_for_completion(mdsc, req); |
@@ -2753,8 +2690,26 @@ int ceph_mdsc_do_request(struct ceph_mds_client *mdsc, | |||
2753 | err = req->r_err; | 2690 | err = req->r_err; |
2754 | } | 2691 | } |
2755 | 2692 | ||
2756 | out: | ||
2757 | mutex_unlock(&mdsc->mutex); | 2693 | mutex_unlock(&mdsc->mutex); |
2694 | return err; | ||
2695 | } | ||
2696 | |||
2697 | /* | ||
2698 | * Synchrously perform an mds request. Take care of all of the | ||
2699 | * session setup, forwarding, retry details. | ||
2700 | */ | ||
2701 | int ceph_mdsc_do_request(struct ceph_mds_client *mdsc, | ||
2702 | struct inode *dir, | ||
2703 | struct ceph_mds_request *req) | ||
2704 | { | ||
2705 | int err; | ||
2706 | |||
2707 | dout("do_request on %p\n", req); | ||
2708 | |||
2709 | /* issue */ | ||
2710 | err = ceph_mdsc_submit_request(mdsc, dir, req); | ||
2711 | if (!err) | ||
2712 | err = ceph_mdsc_wait_request(mdsc, req); | ||
2758 | dout("do_request %p done, result %d\n", req, err); | 2713 | dout("do_request %p done, result %d\n", req, err); |
2759 | return err; | 2714 | return err; |
2760 | } | 2715 | } |
@@ -3485,7 +3440,7 @@ out_freeflocks: | |||
3485 | ceph_pagelist_encode_string(pagelist, path, pathlen); | 3440 | ceph_pagelist_encode_string(pagelist, path, pathlen); |
3486 | ceph_pagelist_append(pagelist, &rec, sizeof(rec.v1)); | 3441 | ceph_pagelist_append(pagelist, &rec, sizeof(rec.v1)); |
3487 | out_freepath: | 3442 | out_freepath: |
3488 | kfree(path); | 3443 | ceph_mdsc_free_path(path, pathlen); |
3489 | } | 3444 | } |
3490 | 3445 | ||
3491 | out_err: | 3446 | out_err: |
@@ -3642,7 +3597,7 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, | |||
3642 | recon_state.msg_version = 2; | 3597 | recon_state.msg_version = 2; |
3643 | } | 3598 | } |
3644 | /* trsaverse this session's caps */ | 3599 | /* trsaverse this session's caps */ |
3645 | err = iterate_session_caps(session, encode_caps_cb, &recon_state); | 3600 | err = ceph_iterate_session_caps(session, encode_caps_cb, &recon_state); |
3646 | 3601 | ||
3647 | spin_lock(&session->s_cap_lock); | 3602 | spin_lock(&session->s_cap_lock); |
3648 | session->s_cap_reconnect = 0; | 3603 | session->s_cap_reconnect = 0; |
@@ -4125,6 +4080,8 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc) | |||
4125 | mdsc->max_sessions = 0; | 4080 | mdsc->max_sessions = 0; |
4126 | mdsc->stopping = 0; | 4081 | mdsc->stopping = 0; |
4127 | atomic64_set(&mdsc->quotarealms_count, 0); | 4082 | atomic64_set(&mdsc->quotarealms_count, 0); |
4083 | mdsc->quotarealms_inodes = RB_ROOT; | ||
4084 | mutex_init(&mdsc->quotarealms_inodes_mutex); | ||
4128 | mdsc->last_snap_seq = 0; | 4085 | mdsc->last_snap_seq = 0; |
4129 | init_rwsem(&mdsc->snap_rwsem); | 4086 | init_rwsem(&mdsc->snap_rwsem); |
4130 | mdsc->snap_realms = RB_ROOT; | 4087 | mdsc->snap_realms = RB_ROOT; |
@@ -4216,6 +4173,8 @@ void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc) | |||
4216 | * their inode/dcache refs | 4173 | * their inode/dcache refs |
4217 | */ | 4174 | */ |
4218 | ceph_msgr_flush(); | 4175 | ceph_msgr_flush(); |
4176 | |||
4177 | ceph_cleanup_quotarealms_inodes(mdsc); | ||
4219 | } | 4178 | } |
4220 | 4179 | ||
4221 | /* | 4180 | /* |
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index 50385a481fdb..a83f28bc2387 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h | |||
@@ -326,6 +326,18 @@ struct ceph_snapid_map { | |||
326 | }; | 326 | }; |
327 | 327 | ||
328 | /* | 328 | /* |
329 | * node for list of quotarealm inodes that are not visible from the filesystem | ||
330 | * mountpoint, but required to handle, e.g. quotas. | ||
331 | */ | ||
332 | struct ceph_quotarealm_inode { | ||
333 | struct rb_node node; | ||
334 | u64 ino; | ||
335 | unsigned long timeout; /* last time a lookup failed for this inode */ | ||
336 | struct mutex mutex; | ||
337 | struct inode *inode; | ||
338 | }; | ||
339 | |||
340 | /* | ||
329 | * mds client state | 341 | * mds client state |
330 | */ | 342 | */ |
331 | struct ceph_mds_client { | 343 | struct ceph_mds_client { |
@@ -344,6 +356,12 @@ struct ceph_mds_client { | |||
344 | int stopping; /* true if shutting down */ | 356 | int stopping; /* true if shutting down */ |
345 | 357 | ||
346 | atomic64_t quotarealms_count; /* # realms with quota */ | 358 | atomic64_t quotarealms_count; /* # realms with quota */ |
359 | /* | ||
360 | * We keep a list of inodes we don't see in the mountpoint but that we | ||
361 | * need to track quota realms. | ||
362 | */ | ||
363 | struct rb_root quotarealms_inodes; | ||
364 | struct mutex quotarealms_inodes_mutex; | ||
347 | 365 | ||
348 | /* | 366 | /* |
349 | * snap_rwsem will cover cap linkage into snaprealms, and | 367 | * snap_rwsem will cover cap linkage into snaprealms, and |
@@ -447,8 +465,9 @@ extern int ceph_alloc_readdir_reply_buffer(struct ceph_mds_request *req, | |||
447 | struct inode *dir); | 465 | struct inode *dir); |
448 | extern struct ceph_mds_request * | 466 | extern struct ceph_mds_request * |
449 | ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode); | 467 | ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode); |
450 | extern void ceph_mdsc_submit_request(struct ceph_mds_client *mdsc, | 468 | extern int ceph_mdsc_submit_request(struct ceph_mds_client *mdsc, |
451 | struct ceph_mds_request *req); | 469 | struct inode *dir, |
470 | struct ceph_mds_request *req); | ||
452 | extern int ceph_mdsc_do_request(struct ceph_mds_client *mdsc, | 471 | extern int ceph_mdsc_do_request(struct ceph_mds_client *mdsc, |
453 | struct inode *dir, | 472 | struct inode *dir, |
454 | struct ceph_mds_request *req); | 473 | struct ceph_mds_request *req); |
@@ -468,8 +487,18 @@ extern void ceph_flush_cap_releases(struct ceph_mds_client *mdsc, | |||
468 | struct ceph_mds_session *session); | 487 | struct ceph_mds_session *session); |
469 | extern void ceph_queue_cap_reclaim_work(struct ceph_mds_client *mdsc); | 488 | extern void ceph_queue_cap_reclaim_work(struct ceph_mds_client *mdsc); |
470 | extern void ceph_reclaim_caps_nr(struct ceph_mds_client *mdsc, int nr); | 489 | extern void ceph_reclaim_caps_nr(struct ceph_mds_client *mdsc, int nr); |
490 | extern int ceph_iterate_session_caps(struct ceph_mds_session *session, | ||
491 | int (*cb)(struct inode *, | ||
492 | struct ceph_cap *, void *), | ||
493 | void *arg); | ||
471 | extern void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc); | 494 | extern void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc); |
472 | 495 | ||
496 | static inline void ceph_mdsc_free_path(char *path, int len) | ||
497 | { | ||
498 | if (path) | ||
499 | __putname(path - (PATH_MAX - 1 - len)); | ||
500 | } | ||
501 | |||
473 | extern char *ceph_mdsc_build_path(struct dentry *dentry, int *plen, u64 *base, | 502 | extern char *ceph_mdsc_build_path(struct dentry *dentry, int *plen, u64 *base, |
474 | int stop_on_nosnap); | 503 | int stop_on_nosnap); |
475 | 504 | ||
diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c index 1a2c5d390f7f..701b4fb0fb5a 100644 --- a/fs/ceph/mdsmap.c +++ b/fs/ceph/mdsmap.c | |||
@@ -205,7 +205,7 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) | |||
205 | 205 | ||
206 | dout("mdsmap_decode %d/%d %lld mds%d.%d %s %s\n", | 206 | dout("mdsmap_decode %d/%d %lld mds%d.%d %s %s\n", |
207 | i+1, n, global_id, mds, inc, | 207 | i+1, n, global_id, mds, inc, |
208 | ceph_pr_addr(&addr.in_addr), | 208 | ceph_pr_addr(&addr), |
209 | ceph_mds_state_name(state)); | 209 | ceph_mds_state_name(state)); |
210 | 210 | ||
211 | if (mds < 0 || state <= 0) | 211 | if (mds < 0 || state <= 0) |
diff --git a/fs/ceph/quota.c b/fs/ceph/quota.c index 9455d3aef0c3..c4522212872c 100644 --- a/fs/ceph/quota.c +++ b/fs/ceph/quota.c | |||
@@ -22,7 +22,16 @@ void ceph_adjust_quota_realms_count(struct inode *inode, bool inc) | |||
22 | static inline bool ceph_has_realms_with_quotas(struct inode *inode) | 22 | static inline bool ceph_has_realms_with_quotas(struct inode *inode) |
23 | { | 23 | { |
24 | struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; | 24 | struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; |
25 | return atomic64_read(&mdsc->quotarealms_count) > 0; | 25 | struct super_block *sb = mdsc->fsc->sb; |
26 | |||
27 | if (atomic64_read(&mdsc->quotarealms_count) > 0) | ||
28 | return true; | ||
29 | /* if root is the real CephFS root, we don't have quota realms */ | ||
30 | if (sb->s_root->d_inode && | ||
31 | (sb->s_root->d_inode->i_ino == CEPH_INO_ROOT)) | ||
32 | return false; | ||
33 | /* otherwise, we can't know for sure */ | ||
34 | return true; | ||
26 | } | 35 | } |
27 | 36 | ||
28 | void ceph_handle_quota(struct ceph_mds_client *mdsc, | 37 | void ceph_handle_quota(struct ceph_mds_client *mdsc, |
@@ -68,6 +77,108 @@ void ceph_handle_quota(struct ceph_mds_client *mdsc, | |||
68 | iput(inode); | 77 | iput(inode); |
69 | } | 78 | } |
70 | 79 | ||
80 | static struct ceph_quotarealm_inode * | ||
81 | find_quotarealm_inode(struct ceph_mds_client *mdsc, u64 ino) | ||
82 | { | ||
83 | struct ceph_quotarealm_inode *qri = NULL; | ||
84 | struct rb_node **node, *parent = NULL; | ||
85 | |||
86 | mutex_lock(&mdsc->quotarealms_inodes_mutex); | ||
87 | node = &(mdsc->quotarealms_inodes.rb_node); | ||
88 | while (*node) { | ||
89 | parent = *node; | ||
90 | qri = container_of(*node, struct ceph_quotarealm_inode, node); | ||
91 | |||
92 | if (ino < qri->ino) | ||
93 | node = &((*node)->rb_left); | ||
94 | else if (ino > qri->ino) | ||
95 | node = &((*node)->rb_right); | ||
96 | else | ||
97 | break; | ||
98 | } | ||
99 | if (!qri || (qri->ino != ino)) { | ||
100 | /* Not found, create a new one and insert it */ | ||
101 | qri = kmalloc(sizeof(*qri), GFP_KERNEL); | ||
102 | if (qri) { | ||
103 | qri->ino = ino; | ||
104 | qri->inode = NULL; | ||
105 | qri->timeout = 0; | ||
106 | mutex_init(&qri->mutex); | ||
107 | rb_link_node(&qri->node, parent, node); | ||
108 | rb_insert_color(&qri->node, &mdsc->quotarealms_inodes); | ||
109 | } else | ||
110 | pr_warn("Failed to alloc quotarealms_inode\n"); | ||
111 | } | ||
112 | mutex_unlock(&mdsc->quotarealms_inodes_mutex); | ||
113 | |||
114 | return qri; | ||
115 | } | ||
116 | |||
117 | /* | ||
118 | * This function will try to lookup a realm inode which isn't visible in the | ||
119 | * filesystem mountpoint. A list of these kind of inodes (not visible) is | ||
120 | * maintained in the mdsc and freed only when the filesystem is umounted. | ||
121 | * | ||
122 | * Note that these inodes are kept in this list even if the lookup fails, which | ||
123 | * allows to prevent useless lookup requests. | ||
124 | */ | ||
125 | static struct inode *lookup_quotarealm_inode(struct ceph_mds_client *mdsc, | ||
126 | struct super_block *sb, | ||
127 | struct ceph_snap_realm *realm) | ||
128 | { | ||
129 | struct ceph_quotarealm_inode *qri; | ||
130 | struct inode *in; | ||
131 | |||
132 | qri = find_quotarealm_inode(mdsc, realm->ino); | ||
133 | if (!qri) | ||
134 | return NULL; | ||
135 | |||
136 | mutex_lock(&qri->mutex); | ||
137 | if (qri->inode) { | ||
138 | /* A request has already returned the inode */ | ||
139 | mutex_unlock(&qri->mutex); | ||
140 | return qri->inode; | ||
141 | } | ||
142 | /* Check if this inode lookup has failed recently */ | ||
143 | if (qri->timeout && | ||
144 | time_before_eq(jiffies, qri->timeout)) { | ||
145 | mutex_unlock(&qri->mutex); | ||
146 | return NULL; | ||
147 | } | ||
148 | in = ceph_lookup_inode(sb, realm->ino); | ||
149 | if (IS_ERR(in)) { | ||
150 | pr_warn("Can't lookup inode %llx (err: %ld)\n", | ||
151 | realm->ino, PTR_ERR(in)); | ||
152 | qri->timeout = jiffies + msecs_to_jiffies(60 * 1000); /* XXX */ | ||
153 | } else { | ||
154 | qri->timeout = 0; | ||
155 | qri->inode = in; | ||
156 | } | ||
157 | mutex_unlock(&qri->mutex); | ||
158 | |||
159 | return in; | ||
160 | } | ||
161 | |||
162 | void ceph_cleanup_quotarealms_inodes(struct ceph_mds_client *mdsc) | ||
163 | { | ||
164 | struct ceph_quotarealm_inode *qri; | ||
165 | struct rb_node *node; | ||
166 | |||
167 | /* | ||
168 | * It should now be safe to clean quotarealms_inode tree without holding | ||
169 | * mdsc->quotarealms_inodes_mutex... | ||
170 | */ | ||
171 | mutex_lock(&mdsc->quotarealms_inodes_mutex); | ||
172 | while (!RB_EMPTY_ROOT(&mdsc->quotarealms_inodes)) { | ||
173 | node = rb_first(&mdsc->quotarealms_inodes); | ||
174 | qri = rb_entry(node, struct ceph_quotarealm_inode, node); | ||
175 | rb_erase(node, &mdsc->quotarealms_inodes); | ||
176 | iput(qri->inode); | ||
177 | kfree(qri); | ||
178 | } | ||
179 | mutex_unlock(&mdsc->quotarealms_inodes_mutex); | ||
180 | } | ||
181 | |||
71 | /* | 182 | /* |
72 | * This function walks through the snaprealm for an inode and returns the | 183 | * This function walks through the snaprealm for an inode and returns the |
73 | * ceph_snap_realm for the first snaprealm that has quotas set (either max_files | 184 | * ceph_snap_realm for the first snaprealm that has quotas set (either max_files |
@@ -76,9 +187,15 @@ void ceph_handle_quota(struct ceph_mds_client *mdsc, | |||
76 | * | 187 | * |
77 | * Note that the caller is responsible for calling ceph_put_snap_realm() on the | 188 | * Note that the caller is responsible for calling ceph_put_snap_realm() on the |
78 | * returned realm. | 189 | * returned realm. |
190 | * | ||
191 | * Callers of this function need to hold mdsc->snap_rwsem. However, if there's | ||
192 | * a need to do an inode lookup, this rwsem will be temporarily dropped. Hence | ||
193 | * the 'retry' argument: if rwsem needs to be dropped and 'retry' is 'false' | ||
194 | * this function will return -EAGAIN; otherwise, the snaprealms walk-through | ||
195 | * will be restarted. | ||
79 | */ | 196 | */ |
80 | static struct ceph_snap_realm *get_quota_realm(struct ceph_mds_client *mdsc, | 197 | static struct ceph_snap_realm *get_quota_realm(struct ceph_mds_client *mdsc, |
81 | struct inode *inode) | 198 | struct inode *inode, bool retry) |
82 | { | 199 | { |
83 | struct ceph_inode_info *ci = NULL; | 200 | struct ceph_inode_info *ci = NULL; |
84 | struct ceph_snap_realm *realm, *next; | 201 | struct ceph_snap_realm *realm, *next; |
@@ -88,6 +205,7 @@ static struct ceph_snap_realm *get_quota_realm(struct ceph_mds_client *mdsc, | |||
88 | if (ceph_snap(inode) != CEPH_NOSNAP) | 205 | if (ceph_snap(inode) != CEPH_NOSNAP) |
89 | return NULL; | 206 | return NULL; |
90 | 207 | ||
208 | restart: | ||
91 | realm = ceph_inode(inode)->i_snap_realm; | 209 | realm = ceph_inode(inode)->i_snap_realm; |
92 | if (realm) | 210 | if (realm) |
93 | ceph_get_snap_realm(mdsc, realm); | 211 | ceph_get_snap_realm(mdsc, realm); |
@@ -95,11 +213,25 @@ static struct ceph_snap_realm *get_quota_realm(struct ceph_mds_client *mdsc, | |||
95 | pr_err_ratelimited("get_quota_realm: ino (%llx.%llx) " | 213 | pr_err_ratelimited("get_quota_realm: ino (%llx.%llx) " |
96 | "null i_snap_realm\n", ceph_vinop(inode)); | 214 | "null i_snap_realm\n", ceph_vinop(inode)); |
97 | while (realm) { | 215 | while (realm) { |
216 | bool has_inode; | ||
217 | |||
98 | spin_lock(&realm->inodes_with_caps_lock); | 218 | spin_lock(&realm->inodes_with_caps_lock); |
99 | in = realm->inode ? igrab(realm->inode) : NULL; | 219 | has_inode = realm->inode; |
220 | in = has_inode ? igrab(realm->inode) : NULL; | ||
100 | spin_unlock(&realm->inodes_with_caps_lock); | 221 | spin_unlock(&realm->inodes_with_caps_lock); |
101 | if (!in) | 222 | if (has_inode && !in) |
102 | break; | 223 | break; |
224 | if (!in) { | ||
225 | up_read(&mdsc->snap_rwsem); | ||
226 | in = lookup_quotarealm_inode(mdsc, inode->i_sb, realm); | ||
227 | down_read(&mdsc->snap_rwsem); | ||
228 | if (IS_ERR_OR_NULL(in)) | ||
229 | break; | ||
230 | ceph_put_snap_realm(mdsc, realm); | ||
231 | if (!retry) | ||
232 | return ERR_PTR(-EAGAIN); | ||
233 | goto restart; | ||
234 | } | ||
103 | 235 | ||
104 | ci = ceph_inode(in); | 236 | ci = ceph_inode(in); |
105 | has_quota = __ceph_has_any_quota(ci); | 237 | has_quota = __ceph_has_any_quota(ci); |
@@ -125,9 +257,22 @@ bool ceph_quota_is_same_realm(struct inode *old, struct inode *new) | |||
125 | struct ceph_snap_realm *old_realm, *new_realm; | 257 | struct ceph_snap_realm *old_realm, *new_realm; |
126 | bool is_same; | 258 | bool is_same; |
127 | 259 | ||
260 | restart: | ||
261 | /* | ||
262 | * We need to lookup 2 quota realms atomically, i.e. with snap_rwsem. | ||
263 | * However, get_quota_realm may drop it temporarily. By setting the | ||
264 | * 'retry' parameter to 'false', we'll get -EAGAIN if the rwsem was | ||
265 | * dropped and we can then restart the whole operation. | ||
266 | */ | ||
128 | down_read(&mdsc->snap_rwsem); | 267 | down_read(&mdsc->snap_rwsem); |
129 | old_realm = get_quota_realm(mdsc, old); | 268 | old_realm = get_quota_realm(mdsc, old, true); |
130 | new_realm = get_quota_realm(mdsc, new); | 269 | new_realm = get_quota_realm(mdsc, new, false); |
270 | if (PTR_ERR(new_realm) == -EAGAIN) { | ||
271 | up_read(&mdsc->snap_rwsem); | ||
272 | if (old_realm) | ||
273 | ceph_put_snap_realm(mdsc, old_realm); | ||
274 | goto restart; | ||
275 | } | ||
131 | is_same = (old_realm == new_realm); | 276 | is_same = (old_realm == new_realm); |
132 | up_read(&mdsc->snap_rwsem); | 277 | up_read(&mdsc->snap_rwsem); |
133 | 278 | ||
@@ -166,6 +311,7 @@ static bool check_quota_exceeded(struct inode *inode, enum quota_check_op op, | |||
166 | return false; | 311 | return false; |
167 | 312 | ||
168 | down_read(&mdsc->snap_rwsem); | 313 | down_read(&mdsc->snap_rwsem); |
314 | restart: | ||
169 | realm = ceph_inode(inode)->i_snap_realm; | 315 | realm = ceph_inode(inode)->i_snap_realm; |
170 | if (realm) | 316 | if (realm) |
171 | ceph_get_snap_realm(mdsc, realm); | 317 | ceph_get_snap_realm(mdsc, realm); |
@@ -173,12 +319,23 @@ static bool check_quota_exceeded(struct inode *inode, enum quota_check_op op, | |||
173 | pr_err_ratelimited("check_quota_exceeded: ino (%llx.%llx) " | 319 | pr_err_ratelimited("check_quota_exceeded: ino (%llx.%llx) " |
174 | "null i_snap_realm\n", ceph_vinop(inode)); | 320 | "null i_snap_realm\n", ceph_vinop(inode)); |
175 | while (realm) { | 321 | while (realm) { |
322 | bool has_inode; | ||
323 | |||
176 | spin_lock(&realm->inodes_with_caps_lock); | 324 | spin_lock(&realm->inodes_with_caps_lock); |
177 | in = realm->inode ? igrab(realm->inode) : NULL; | 325 | has_inode = realm->inode; |
326 | in = has_inode ? igrab(realm->inode) : NULL; | ||
178 | spin_unlock(&realm->inodes_with_caps_lock); | 327 | spin_unlock(&realm->inodes_with_caps_lock); |
179 | if (!in) | 328 | if (has_inode && !in) |
180 | break; | 329 | break; |
181 | 330 | if (!in) { | |
331 | up_read(&mdsc->snap_rwsem); | ||
332 | in = lookup_quotarealm_inode(mdsc, inode->i_sb, realm); | ||
333 | down_read(&mdsc->snap_rwsem); | ||
334 | if (IS_ERR_OR_NULL(in)) | ||
335 | break; | ||
336 | ceph_put_snap_realm(mdsc, realm); | ||
337 | goto restart; | ||
338 | } | ||
182 | ci = ceph_inode(in); | 339 | ci = ceph_inode(in); |
183 | spin_lock(&ci->i_ceph_lock); | 340 | spin_lock(&ci->i_ceph_lock); |
184 | if (op == QUOTA_CHECK_MAX_FILES_OP) { | 341 | if (op == QUOTA_CHECK_MAX_FILES_OP) { |
@@ -314,7 +471,7 @@ bool ceph_quota_update_statfs(struct ceph_fs_client *fsc, struct kstatfs *buf) | |||
314 | bool is_updated = false; | 471 | bool is_updated = false; |
315 | 472 | ||
316 | down_read(&mdsc->snap_rwsem); | 473 | down_read(&mdsc->snap_rwsem); |
317 | realm = get_quota_realm(mdsc, d_inode(fsc->sb->s_root)); | 474 | realm = get_quota_realm(mdsc, d_inode(fsc->sb->s_root), true); |
318 | up_read(&mdsc->snap_rwsem); | 475 | up_read(&mdsc->snap_rwsem); |
319 | if (!realm) | 476 | if (!realm) |
320 | return false; | 477 | return false; |
diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 285edda4fc3b..c864b44c8341 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c | |||
@@ -845,6 +845,12 @@ static void ceph_umount_begin(struct super_block *sb) | |||
845 | return; | 845 | return; |
846 | } | 846 | } |
847 | 847 | ||
848 | static int ceph_remount(struct super_block *sb, int *flags, char *data) | ||
849 | { | ||
850 | sync_filesystem(sb); | ||
851 | return 0; | ||
852 | } | ||
853 | |||
848 | static const struct super_operations ceph_super_ops = { | 854 | static const struct super_operations ceph_super_ops = { |
849 | .alloc_inode = ceph_alloc_inode, | 855 | .alloc_inode = ceph_alloc_inode, |
850 | .destroy_inode = ceph_destroy_inode, | 856 | .destroy_inode = ceph_destroy_inode, |
@@ -853,6 +859,7 @@ static const struct super_operations ceph_super_ops = { | |||
853 | .drop_inode = ceph_drop_inode, | 859 | .drop_inode = ceph_drop_inode, |
854 | .sync_fs = ceph_sync_fs, | 860 | .sync_fs = ceph_sync_fs, |
855 | .put_super = ceph_put_super, | 861 | .put_super = ceph_put_super, |
862 | .remount_fs = ceph_remount, | ||
856 | .show_options = ceph_show_options, | 863 | .show_options = ceph_show_options, |
857 | .statfs = ceph_statfs, | 864 | .statfs = ceph_statfs, |
858 | .umount_begin = ceph_umount_begin, | 865 | .umount_begin = ceph_umount_begin, |
diff --git a/fs/ceph/super.h b/fs/ceph/super.h index c5b4a05905c0..6edab9a750f8 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h | |||
@@ -1083,6 +1083,7 @@ extern long ceph_ioctl(struct file *file, unsigned int cmd, unsigned long arg); | |||
1083 | 1083 | ||
1084 | /* export.c */ | 1084 | /* export.c */ |
1085 | extern const struct export_operations ceph_export_ops; | 1085 | extern const struct export_operations ceph_export_ops; |
1086 | struct inode *ceph_lookup_inode(struct super_block *sb, u64 ino); | ||
1086 | 1087 | ||
1087 | /* locks.c */ | 1088 | /* locks.c */ |
1088 | extern __init void ceph_flock_init(void); | 1089 | extern __init void ceph_flock_init(void); |
@@ -1133,5 +1134,6 @@ extern bool ceph_quota_is_max_bytes_approaching(struct inode *inode, | |||
1133 | loff_t newlen); | 1134 | loff_t newlen); |
1134 | extern bool ceph_quota_update_statfs(struct ceph_fs_client *fsc, | 1135 | extern bool ceph_quota_update_statfs(struct ceph_fs_client *fsc, |
1135 | struct kstatfs *buf); | 1136 | struct kstatfs *buf); |
1137 | extern void ceph_cleanup_quotarealms_inodes(struct ceph_mds_client *mdsc); | ||
1136 | 1138 | ||
1137 | #endif /* _FS_CEPH_SUPER_H */ | 1139 | #endif /* _FS_CEPH_SUPER_H */ |