aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ceph/caps.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ceph/caps.c')
-rw-r--r--fs/ceph/caps.c116
1 files changed, 72 insertions, 44 deletions
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index db122bb357b8..aa2239fa9a3b 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -3,6 +3,7 @@
3#include <linux/fs.h> 3#include <linux/fs.h>
4#include <linux/kernel.h> 4#include <linux/kernel.h>
5#include <linux/sched.h> 5#include <linux/sched.h>
6#include <linux/slab.h>
6#include <linux/vmalloc.h> 7#include <linux/vmalloc.h>
7#include <linux/wait.h> 8#include <linux/wait.h>
8#include <linux/writeback.h> 9#include <linux/writeback.h>
@@ -1204,6 +1205,12 @@ retry:
1204 if (capsnap->dirty_pages || capsnap->writing) 1205 if (capsnap->dirty_pages || capsnap->writing)
1205 continue; 1206 continue;
1206 1207
1208 /*
1209 * if cap writeback already occurred, we should have dropped
1210 * the capsnap in ceph_put_wrbuffer_cap_refs.
1211 */
1212 BUG_ON(capsnap->dirty == 0);
1213
1207 /* pick mds, take s_mutex */ 1214 /* pick mds, take s_mutex */
1208 mds = __ceph_get_cap_mds(ci, &mseq); 1215 mds = __ceph_get_cap_mds(ci, &mseq);
1209 if (session && session->s_mds != mds) { 1216 if (session && session->s_mds != mds) {
@@ -1407,6 +1414,7 @@ static int try_nonblocking_invalidate(struct inode *inode)
1407 */ 1414 */
1408void ceph_check_caps(struct ceph_inode_info *ci, int flags, 1415void ceph_check_caps(struct ceph_inode_info *ci, int flags,
1409 struct ceph_mds_session *session) 1416 struct ceph_mds_session *session)
1417 __releases(session->s_mutex)
1410{ 1418{
1411 struct ceph_client *client = ceph_inode_to_client(&ci->vfs_inode); 1419 struct ceph_client *client = ceph_inode_to_client(&ci->vfs_inode);
1412 struct ceph_mds_client *mdsc = &client->mdsc; 1420 struct ceph_mds_client *mdsc = &client->mdsc;
@@ -1414,7 +1422,6 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
1414 struct ceph_cap *cap; 1422 struct ceph_cap *cap;
1415 int file_wanted, used; 1423 int file_wanted, used;
1416 int took_snap_rwsem = 0; /* true if mdsc->snap_rwsem held */ 1424 int took_snap_rwsem = 0; /* true if mdsc->snap_rwsem held */
1417 int drop_session_lock = session ? 0 : 1;
1418 int issued, implemented, want, retain, revoking, flushing = 0; 1425 int issued, implemented, want, retain, revoking, flushing = 0;
1419 int mds = -1; /* keep track of how far we've gone through i_caps list 1426 int mds = -1; /* keep track of how far we've gone through i_caps list
1420 to avoid an infinite loop on retry */ 1427 to avoid an infinite loop on retry */
@@ -1639,7 +1646,7 @@ ack:
1639 if (queue_invalidate) 1646 if (queue_invalidate)
1640 ceph_queue_invalidate(inode); 1647 ceph_queue_invalidate(inode);
1641 1648
1642 if (session && drop_session_lock) 1649 if (session)
1643 mutex_unlock(&session->s_mutex); 1650 mutex_unlock(&session->s_mutex);
1644 if (took_snap_rwsem) 1651 if (took_snap_rwsem)
1645 up_read(&mdsc->snap_rwsem); 1652 up_read(&mdsc->snap_rwsem);
@@ -2117,8 +2124,8 @@ void ceph_put_cap_refs(struct ceph_inode_info *ci, int had)
2117 } 2124 }
2118 spin_unlock(&inode->i_lock); 2125 spin_unlock(&inode->i_lock);
2119 2126
2120 dout("put_cap_refs %p had %s %s\n", inode, ceph_cap_string(had), 2127 dout("put_cap_refs %p had %s%s%s\n", inode, ceph_cap_string(had),
2121 last ? "last" : ""); 2128 last ? " last" : "", put ? " put" : "");
2122 2129
2123 if (last && !flushsnaps) 2130 if (last && !flushsnaps)
2124 ceph_check_caps(ci, 0, NULL); 2131 ceph_check_caps(ci, 0, NULL);
@@ -2142,7 +2149,8 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
2142{ 2149{
2143 struct inode *inode = &ci->vfs_inode; 2150 struct inode *inode = &ci->vfs_inode;
2144 int last = 0; 2151 int last = 0;
2145 int last_snap = 0; 2152 int complete_capsnap = 0;
2153 int drop_capsnap = 0;
2146 int found = 0; 2154 int found = 0;
2147 struct ceph_cap_snap *capsnap = NULL; 2155 struct ceph_cap_snap *capsnap = NULL;
2148 2156
@@ -2165,19 +2173,32 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
2165 list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) { 2173 list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) {
2166 if (capsnap->context == snapc) { 2174 if (capsnap->context == snapc) {
2167 found = 1; 2175 found = 1;
2168 capsnap->dirty_pages -= nr;
2169 last_snap = !capsnap->dirty_pages;
2170 break; 2176 break;
2171 } 2177 }
2172 } 2178 }
2173 BUG_ON(!found); 2179 BUG_ON(!found);
2180 capsnap->dirty_pages -= nr;
2181 if (capsnap->dirty_pages == 0) {
2182 complete_capsnap = 1;
2183 if (capsnap->dirty == 0)
2184 /* cap writeback completed before we created
2185 * the cap_snap; no FLUSHSNAP is needed */
2186 drop_capsnap = 1;
2187 }
2174 dout("put_wrbuffer_cap_refs on %p cap_snap %p " 2188 dout("put_wrbuffer_cap_refs on %p cap_snap %p "
2175 " snap %lld %d/%d -> %d/%d %s%s\n", 2189 " snap %lld %d/%d -> %d/%d %s%s%s\n",
2176 inode, capsnap, capsnap->context->seq, 2190 inode, capsnap, capsnap->context->seq,
2177 ci->i_wrbuffer_ref+nr, capsnap->dirty_pages + nr, 2191 ci->i_wrbuffer_ref+nr, capsnap->dirty_pages + nr,
2178 ci->i_wrbuffer_ref, capsnap->dirty_pages, 2192 ci->i_wrbuffer_ref, capsnap->dirty_pages,
2179 last ? " (wrbuffer last)" : "", 2193 last ? " (wrbuffer last)" : "",
2180 last_snap ? " (capsnap last)" : ""); 2194 complete_capsnap ? " (complete capsnap)" : "",
2195 drop_capsnap ? " (drop capsnap)" : "");
2196 if (drop_capsnap) {
2197 ceph_put_snap_context(capsnap->context);
2198 list_del(&capsnap->ci_item);
2199 list_del(&capsnap->flushing_item);
2200 ceph_put_cap_snap(capsnap);
2201 }
2181 } 2202 }
2182 2203
2183 spin_unlock(&inode->i_lock); 2204 spin_unlock(&inode->i_lock);
@@ -2185,28 +2206,31 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
2185 if (last) { 2206 if (last) {
2186 ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL); 2207 ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL);
2187 iput(inode); 2208 iput(inode);
2188 } else if (last_snap) { 2209 } else if (complete_capsnap) {
2189 ceph_flush_snaps(ci); 2210 ceph_flush_snaps(ci);
2190 wake_up(&ci->i_cap_wq); 2211 wake_up(&ci->i_cap_wq);
2191 } 2212 }
2213 if (drop_capsnap)
2214 iput(inode);
2192} 2215}
2193 2216
2194/* 2217/*
2195 * Handle a cap GRANT message from the MDS. (Note that a GRANT may 2218 * Handle a cap GRANT message from the MDS. (Note that a GRANT may
2196 * actually be a revocation if it specifies a smaller cap set.) 2219 * actually be a revocation if it specifies a smaller cap set.)
2197 * 2220 *
2198 * caller holds s_mutex. 2221 * caller holds s_mutex and i_lock, we drop both.
2222 *
2199 * return value: 2223 * return value:
2200 * 0 - ok 2224 * 0 - ok
2201 * 1 - check_caps on auth cap only (writeback) 2225 * 1 - check_caps on auth cap only (writeback)
2202 * 2 - check_caps (ack revoke) 2226 * 2 - check_caps (ack revoke)
2203 */ 2227 */
2204static int handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, 2228static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
2205 struct ceph_mds_session *session, 2229 struct ceph_mds_session *session,
2206 struct ceph_cap *cap, 2230 struct ceph_cap *cap,
2207 struct ceph_buffer *xattr_buf) 2231 struct ceph_buffer *xattr_buf)
2208 __releases(inode->i_lock) 2232 __releases(inode->i_lock)
2209 2233 __releases(session->s_mutex)
2210{ 2234{
2211 struct ceph_inode_info *ci = ceph_inode(inode); 2235 struct ceph_inode_info *ci = ceph_inode(inode);
2212 int mds = session->s_mds; 2236 int mds = session->s_mds;
@@ -2216,7 +2240,7 @@ static int handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
2216 u64 size = le64_to_cpu(grant->size); 2240 u64 size = le64_to_cpu(grant->size);
2217 u64 max_size = le64_to_cpu(grant->max_size); 2241 u64 max_size = le64_to_cpu(grant->max_size);
2218 struct timespec mtime, atime, ctime; 2242 struct timespec mtime, atime, ctime;
2219 int reply = 0; 2243 int check_caps = 0;
2220 int wake = 0; 2244 int wake = 0;
2221 int writeback = 0; 2245 int writeback = 0;
2222 int revoked_rdcache = 0; 2246 int revoked_rdcache = 0;
@@ -2329,11 +2353,12 @@ static int handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
2329 if ((used & ~newcaps) & CEPH_CAP_FILE_BUFFER) 2353 if ((used & ~newcaps) & CEPH_CAP_FILE_BUFFER)
2330 writeback = 1; /* will delay ack */ 2354 writeback = 1; /* will delay ack */
2331 else if (dirty & ~newcaps) 2355 else if (dirty & ~newcaps)
2332 reply = 1; /* initiate writeback in check_caps */ 2356 check_caps = 1; /* initiate writeback in check_caps */
2333 else if (((used & ~newcaps) & CEPH_CAP_FILE_CACHE) == 0 || 2357 else if (((used & ~newcaps) & CEPH_CAP_FILE_CACHE) == 0 ||
2334 revoked_rdcache) 2358 revoked_rdcache)
2335 reply = 2; /* send revoke ack in check_caps */ 2359 check_caps = 2; /* send revoke ack in check_caps */
2336 cap->issued = newcaps; 2360 cap->issued = newcaps;
2361 cap->implemented |= newcaps;
2337 } else if (cap->issued == newcaps) { 2362 } else if (cap->issued == newcaps) {
2338 dout("caps unchanged: %s -> %s\n", 2363 dout("caps unchanged: %s -> %s\n",
2339 ceph_cap_string(cap->issued), ceph_cap_string(newcaps)); 2364 ceph_cap_string(cap->issued), ceph_cap_string(newcaps));
@@ -2346,6 +2371,7 @@ static int handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
2346 * pending revocation */ 2371 * pending revocation */
2347 wake = 1; 2372 wake = 1;
2348 } 2373 }
2374 BUG_ON(cap->issued & ~cap->implemented);
2349 2375
2350 spin_unlock(&inode->i_lock); 2376 spin_unlock(&inode->i_lock);
2351 if (writeback) 2377 if (writeback)
@@ -2359,7 +2385,14 @@ static int handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
2359 ceph_queue_invalidate(inode); 2385 ceph_queue_invalidate(inode);
2360 if (wake) 2386 if (wake)
2361 wake_up(&ci->i_cap_wq); 2387 wake_up(&ci->i_cap_wq);
2362 return reply; 2388
2389 if (check_caps == 1)
2390 ceph_check_caps(ci, CHECK_CAPS_NODELAY|CHECK_CAPS_AUTHONLY,
2391 session);
2392 else if (check_caps == 2)
2393 ceph_check_caps(ci, CHECK_CAPS_NODELAY, session);
2394 else
2395 mutex_unlock(&session->s_mutex);
2363} 2396}
2364 2397
2365/* 2398/*
@@ -2454,8 +2487,8 @@ static void handle_cap_flushsnap_ack(struct inode *inode, u64 flush_tid,
2454 break; 2487 break;
2455 } 2488 }
2456 WARN_ON(capsnap->dirty_pages || capsnap->writing); 2489 WARN_ON(capsnap->dirty_pages || capsnap->writing);
2457 dout(" removing cap_snap %p follows %lld\n", 2490 dout(" removing %p cap_snap %p follows %lld\n",
2458 capsnap, follows); 2491 inode, capsnap, follows);
2459 ceph_put_snap_context(capsnap->context); 2492 ceph_put_snap_context(capsnap->context);
2460 list_del(&capsnap->ci_item); 2493 list_del(&capsnap->ci_item);
2461 list_del(&capsnap->flushing_item); 2494 list_del(&capsnap->flushing_item);
@@ -2548,9 +2581,8 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex,
2548 ci->i_cap_exporting_issued = cap->issued; 2581 ci->i_cap_exporting_issued = cap->issued;
2549 } 2582 }
2550 __ceph_remove_cap(cap); 2583 __ceph_remove_cap(cap);
2551 } else {
2552 WARN_ON(!cap);
2553 } 2584 }
2585 /* else, we already released it */
2554 2586
2555 spin_unlock(&inode->i_lock); 2587 spin_unlock(&inode->i_lock);
2556} 2588}
@@ -2621,9 +2653,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
2621 u64 cap_id; 2653 u64 cap_id;
2622 u64 size, max_size; 2654 u64 size, max_size;
2623 u64 tid; 2655 u64 tid;
2624 int check_caps = 0;
2625 void *snaptrace; 2656 void *snaptrace;
2626 int r;
2627 2657
2628 dout("handle_caps from mds%d\n", mds); 2658 dout("handle_caps from mds%d\n", mds);
2629 2659
@@ -2668,8 +2698,9 @@ void ceph_handle_caps(struct ceph_mds_session *session,
2668 case CEPH_CAP_OP_IMPORT: 2698 case CEPH_CAP_OP_IMPORT:
2669 handle_cap_import(mdsc, inode, h, session, 2699 handle_cap_import(mdsc, inode, h, session,
2670 snaptrace, le32_to_cpu(h->snap_trace_len)); 2700 snaptrace, le32_to_cpu(h->snap_trace_len));
2671 check_caps = 1; /* we may have sent a RELEASE to the old auth */ 2701 ceph_check_caps(ceph_inode(inode), CHECK_CAPS_NODELAY,
2672 goto done; 2702 session);
2703 goto done_unlocked;
2673 } 2704 }
2674 2705
2675 /* the rest require a cap */ 2706 /* the rest require a cap */
@@ -2686,16 +2717,8 @@ void ceph_handle_caps(struct ceph_mds_session *session,
2686 switch (op) { 2717 switch (op) {
2687 case CEPH_CAP_OP_REVOKE: 2718 case CEPH_CAP_OP_REVOKE:
2688 case CEPH_CAP_OP_GRANT: 2719 case CEPH_CAP_OP_GRANT:
2689 r = handle_cap_grant(inode, h, session, cap, msg->middle); 2720 handle_cap_grant(inode, h, session, cap, msg->middle);
2690 if (r == 1) 2721 goto done_unlocked;
2691 ceph_check_caps(ceph_inode(inode),
2692 CHECK_CAPS_NODELAY|CHECK_CAPS_AUTHONLY,
2693 session);
2694 else if (r == 2)
2695 ceph_check_caps(ceph_inode(inode),
2696 CHECK_CAPS_NODELAY,
2697 session);
2698 break;
2699 2722
2700 case CEPH_CAP_OP_FLUSH_ACK: 2723 case CEPH_CAP_OP_FLUSH_ACK:
2701 handle_cap_flush_ack(inode, tid, h, session, cap); 2724 handle_cap_flush_ack(inode, tid, h, session, cap);
@@ -2713,9 +2736,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
2713 2736
2714done: 2737done:
2715 mutex_unlock(&session->s_mutex); 2738 mutex_unlock(&session->s_mutex);
2716 2739done_unlocked:
2717 if (check_caps)
2718 ceph_check_caps(ceph_inode(inode), CHECK_CAPS_NODELAY, NULL);
2719 if (inode) 2740 if (inode)
2720 iput(inode); 2741 iput(inode);
2721 return; 2742 return;
@@ -2838,11 +2859,18 @@ int ceph_encode_inode_release(void **p, struct inode *inode,
2838 struct ceph_cap *cap; 2859 struct ceph_cap *cap;
2839 struct ceph_mds_request_release *rel = *p; 2860 struct ceph_mds_request_release *rel = *p;
2840 int ret = 0; 2861 int ret = 0;
2841 2862 int used = 0;
2842 dout("encode_inode_release %p mds%d drop %s unless %s\n", inode,
2843 mds, ceph_cap_string(drop), ceph_cap_string(unless));
2844 2863
2845 spin_lock(&inode->i_lock); 2864 spin_lock(&inode->i_lock);
2865 used = __ceph_caps_used(ci);
2866
2867 dout("encode_inode_release %p mds%d used %s drop %s unless %s\n", inode,
2868 mds, ceph_cap_string(used), ceph_cap_string(drop),
2869 ceph_cap_string(unless));
2870
2871 /* only drop unused caps */
2872 drop &= ~used;
2873
2846 cap = __get_cap_for_mds(ci, mds); 2874 cap = __get_cap_for_mds(ci, mds);
2847 if (cap && __cap_is_valid(cap)) { 2875 if (cap && __cap_is_valid(cap)) {
2848 if (force || 2876 if (force ||