diff options
Diffstat (limited to 'fs/ceph/caps.c')
-rw-r--r-- | fs/ceph/caps.c | 116 |
1 files changed, 72 insertions, 44 deletions
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index db122bb357b8..aa2239fa9a3b 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c | |||
@@ -3,6 +3,7 @@ | |||
3 | #include <linux/fs.h> | 3 | #include <linux/fs.h> |
4 | #include <linux/kernel.h> | 4 | #include <linux/kernel.h> |
5 | #include <linux/sched.h> | 5 | #include <linux/sched.h> |
6 | #include <linux/slab.h> | ||
6 | #include <linux/vmalloc.h> | 7 | #include <linux/vmalloc.h> |
7 | #include <linux/wait.h> | 8 | #include <linux/wait.h> |
8 | #include <linux/writeback.h> | 9 | #include <linux/writeback.h> |
@@ -1204,6 +1205,12 @@ retry: | |||
1204 | if (capsnap->dirty_pages || capsnap->writing) | 1205 | if (capsnap->dirty_pages || capsnap->writing) |
1205 | continue; | 1206 | continue; |
1206 | 1207 | ||
1208 | /* | ||
1209 | * if cap writeback already occurred, we should have dropped | ||
1210 | * the capsnap in ceph_put_wrbuffer_cap_refs. | ||
1211 | */ | ||
1212 | BUG_ON(capsnap->dirty == 0); | ||
1213 | |||
1207 | /* pick mds, take s_mutex */ | 1214 | /* pick mds, take s_mutex */ |
1208 | mds = __ceph_get_cap_mds(ci, &mseq); | 1215 | mds = __ceph_get_cap_mds(ci, &mseq); |
1209 | if (session && session->s_mds != mds) { | 1216 | if (session && session->s_mds != mds) { |
@@ -1407,6 +1414,7 @@ static int try_nonblocking_invalidate(struct inode *inode) | |||
1407 | */ | 1414 | */ |
1408 | void ceph_check_caps(struct ceph_inode_info *ci, int flags, | 1415 | void ceph_check_caps(struct ceph_inode_info *ci, int flags, |
1409 | struct ceph_mds_session *session) | 1416 | struct ceph_mds_session *session) |
1417 | __releases(session->s_mutex) | ||
1410 | { | 1418 | { |
1411 | struct ceph_client *client = ceph_inode_to_client(&ci->vfs_inode); | 1419 | struct ceph_client *client = ceph_inode_to_client(&ci->vfs_inode); |
1412 | struct ceph_mds_client *mdsc = &client->mdsc; | 1420 | struct ceph_mds_client *mdsc = &client->mdsc; |
@@ -1414,7 +1422,6 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags, | |||
1414 | struct ceph_cap *cap; | 1422 | struct ceph_cap *cap; |
1415 | int file_wanted, used; | 1423 | int file_wanted, used; |
1416 | int took_snap_rwsem = 0; /* true if mdsc->snap_rwsem held */ | 1424 | int took_snap_rwsem = 0; /* true if mdsc->snap_rwsem held */ |
1417 | int drop_session_lock = session ? 0 : 1; | ||
1418 | int issued, implemented, want, retain, revoking, flushing = 0; | 1425 | int issued, implemented, want, retain, revoking, flushing = 0; |
1419 | int mds = -1; /* keep track of how far we've gone through i_caps list | 1426 | int mds = -1; /* keep track of how far we've gone through i_caps list |
1420 | to avoid an infinite loop on retry */ | 1427 | to avoid an infinite loop on retry */ |
@@ -1639,7 +1646,7 @@ ack: | |||
1639 | if (queue_invalidate) | 1646 | if (queue_invalidate) |
1640 | ceph_queue_invalidate(inode); | 1647 | ceph_queue_invalidate(inode); |
1641 | 1648 | ||
1642 | if (session && drop_session_lock) | 1649 | if (session) |
1643 | mutex_unlock(&session->s_mutex); | 1650 | mutex_unlock(&session->s_mutex); |
1644 | if (took_snap_rwsem) | 1651 | if (took_snap_rwsem) |
1645 | up_read(&mdsc->snap_rwsem); | 1652 | up_read(&mdsc->snap_rwsem); |
@@ -2117,8 +2124,8 @@ void ceph_put_cap_refs(struct ceph_inode_info *ci, int had) | |||
2117 | } | 2124 | } |
2118 | spin_unlock(&inode->i_lock); | 2125 | spin_unlock(&inode->i_lock); |
2119 | 2126 | ||
2120 | dout("put_cap_refs %p had %s %s\n", inode, ceph_cap_string(had), | 2127 | dout("put_cap_refs %p had %s%s%s\n", inode, ceph_cap_string(had), |
2121 | last ? "last" : ""); | 2128 | last ? " last" : "", put ? " put" : ""); |
2122 | 2129 | ||
2123 | if (last && !flushsnaps) | 2130 | if (last && !flushsnaps) |
2124 | ceph_check_caps(ci, 0, NULL); | 2131 | ceph_check_caps(ci, 0, NULL); |
@@ -2142,7 +2149,8 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr, | |||
2142 | { | 2149 | { |
2143 | struct inode *inode = &ci->vfs_inode; | 2150 | struct inode *inode = &ci->vfs_inode; |
2144 | int last = 0; | 2151 | int last = 0; |
2145 | int last_snap = 0; | 2152 | int complete_capsnap = 0; |
2153 | int drop_capsnap = 0; | ||
2146 | int found = 0; | 2154 | int found = 0; |
2147 | struct ceph_cap_snap *capsnap = NULL; | 2155 | struct ceph_cap_snap *capsnap = NULL; |
2148 | 2156 | ||
@@ -2165,19 +2173,32 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr, | |||
2165 | list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) { | 2173 | list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) { |
2166 | if (capsnap->context == snapc) { | 2174 | if (capsnap->context == snapc) { |
2167 | found = 1; | 2175 | found = 1; |
2168 | capsnap->dirty_pages -= nr; | ||
2169 | last_snap = !capsnap->dirty_pages; | ||
2170 | break; | 2176 | break; |
2171 | } | 2177 | } |
2172 | } | 2178 | } |
2173 | BUG_ON(!found); | 2179 | BUG_ON(!found); |
2180 | capsnap->dirty_pages -= nr; | ||
2181 | if (capsnap->dirty_pages == 0) { | ||
2182 | complete_capsnap = 1; | ||
2183 | if (capsnap->dirty == 0) | ||
2184 | /* cap writeback completed before we created | ||
2185 | * the cap_snap; no FLUSHSNAP is needed */ | ||
2186 | drop_capsnap = 1; | ||
2187 | } | ||
2174 | dout("put_wrbuffer_cap_refs on %p cap_snap %p " | 2188 | dout("put_wrbuffer_cap_refs on %p cap_snap %p " |
2175 | " snap %lld %d/%d -> %d/%d %s%s\n", | 2189 | " snap %lld %d/%d -> %d/%d %s%s%s\n", |
2176 | inode, capsnap, capsnap->context->seq, | 2190 | inode, capsnap, capsnap->context->seq, |
2177 | ci->i_wrbuffer_ref+nr, capsnap->dirty_pages + nr, | 2191 | ci->i_wrbuffer_ref+nr, capsnap->dirty_pages + nr, |
2178 | ci->i_wrbuffer_ref, capsnap->dirty_pages, | 2192 | ci->i_wrbuffer_ref, capsnap->dirty_pages, |
2179 | last ? " (wrbuffer last)" : "", | 2193 | last ? " (wrbuffer last)" : "", |
2180 | last_snap ? " (capsnap last)" : ""); | 2194 | complete_capsnap ? " (complete capsnap)" : "", |
2195 | drop_capsnap ? " (drop capsnap)" : ""); | ||
2196 | if (drop_capsnap) { | ||
2197 | ceph_put_snap_context(capsnap->context); | ||
2198 | list_del(&capsnap->ci_item); | ||
2199 | list_del(&capsnap->flushing_item); | ||
2200 | ceph_put_cap_snap(capsnap); | ||
2201 | } | ||
2181 | } | 2202 | } |
2182 | 2203 | ||
2183 | spin_unlock(&inode->i_lock); | 2204 | spin_unlock(&inode->i_lock); |
@@ -2185,28 +2206,31 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr, | |||
2185 | if (last) { | 2206 | if (last) { |
2186 | ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL); | 2207 | ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL); |
2187 | iput(inode); | 2208 | iput(inode); |
2188 | } else if (last_snap) { | 2209 | } else if (complete_capsnap) { |
2189 | ceph_flush_snaps(ci); | 2210 | ceph_flush_snaps(ci); |
2190 | wake_up(&ci->i_cap_wq); | 2211 | wake_up(&ci->i_cap_wq); |
2191 | } | 2212 | } |
2213 | if (drop_capsnap) | ||
2214 | iput(inode); | ||
2192 | } | 2215 | } |
2193 | 2216 | ||
2194 | /* | 2217 | /* |
2195 | * Handle a cap GRANT message from the MDS. (Note that a GRANT may | 2218 | * Handle a cap GRANT message from the MDS. (Note that a GRANT may |
2196 | * actually be a revocation if it specifies a smaller cap set.) | 2219 | * actually be a revocation if it specifies a smaller cap set.) |
2197 | * | 2220 | * |
2198 | * caller holds s_mutex. | 2221 | * caller holds s_mutex and i_lock, we drop both. |
2222 | * | ||
2199 | * return value: | 2223 | * return value: |
2200 | * 0 - ok | 2224 | * 0 - ok |
2201 | * 1 - check_caps on auth cap only (writeback) | 2225 | * 1 - check_caps on auth cap only (writeback) |
2202 | * 2 - check_caps (ack revoke) | 2226 | * 2 - check_caps (ack revoke) |
2203 | */ | 2227 | */ |
2204 | static int handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, | 2228 | static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, |
2205 | struct ceph_mds_session *session, | 2229 | struct ceph_mds_session *session, |
2206 | struct ceph_cap *cap, | 2230 | struct ceph_cap *cap, |
2207 | struct ceph_buffer *xattr_buf) | 2231 | struct ceph_buffer *xattr_buf) |
2208 | __releases(inode->i_lock) | 2232 | __releases(inode->i_lock) |
2209 | 2233 | __releases(session->s_mutex) | |
2210 | { | 2234 | { |
2211 | struct ceph_inode_info *ci = ceph_inode(inode); | 2235 | struct ceph_inode_info *ci = ceph_inode(inode); |
2212 | int mds = session->s_mds; | 2236 | int mds = session->s_mds; |
@@ -2216,7 +2240,7 @@ static int handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, | |||
2216 | u64 size = le64_to_cpu(grant->size); | 2240 | u64 size = le64_to_cpu(grant->size); |
2217 | u64 max_size = le64_to_cpu(grant->max_size); | 2241 | u64 max_size = le64_to_cpu(grant->max_size); |
2218 | struct timespec mtime, atime, ctime; | 2242 | struct timespec mtime, atime, ctime; |
2219 | int reply = 0; | 2243 | int check_caps = 0; |
2220 | int wake = 0; | 2244 | int wake = 0; |
2221 | int writeback = 0; | 2245 | int writeback = 0; |
2222 | int revoked_rdcache = 0; | 2246 | int revoked_rdcache = 0; |
@@ -2329,11 +2353,12 @@ static int handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, | |||
2329 | if ((used & ~newcaps) & CEPH_CAP_FILE_BUFFER) | 2353 | if ((used & ~newcaps) & CEPH_CAP_FILE_BUFFER) |
2330 | writeback = 1; /* will delay ack */ | 2354 | writeback = 1; /* will delay ack */ |
2331 | else if (dirty & ~newcaps) | 2355 | else if (dirty & ~newcaps) |
2332 | reply = 1; /* initiate writeback in check_caps */ | 2356 | check_caps = 1; /* initiate writeback in check_caps */ |
2333 | else if (((used & ~newcaps) & CEPH_CAP_FILE_CACHE) == 0 || | 2357 | else if (((used & ~newcaps) & CEPH_CAP_FILE_CACHE) == 0 || |
2334 | revoked_rdcache) | 2358 | revoked_rdcache) |
2335 | reply = 2; /* send revoke ack in check_caps */ | 2359 | check_caps = 2; /* send revoke ack in check_caps */ |
2336 | cap->issued = newcaps; | 2360 | cap->issued = newcaps; |
2361 | cap->implemented |= newcaps; | ||
2337 | } else if (cap->issued == newcaps) { | 2362 | } else if (cap->issued == newcaps) { |
2338 | dout("caps unchanged: %s -> %s\n", | 2363 | dout("caps unchanged: %s -> %s\n", |
2339 | ceph_cap_string(cap->issued), ceph_cap_string(newcaps)); | 2364 | ceph_cap_string(cap->issued), ceph_cap_string(newcaps)); |
@@ -2346,6 +2371,7 @@ static int handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, | |||
2346 | * pending revocation */ | 2371 | * pending revocation */ |
2347 | wake = 1; | 2372 | wake = 1; |
2348 | } | 2373 | } |
2374 | BUG_ON(cap->issued & ~cap->implemented); | ||
2349 | 2375 | ||
2350 | spin_unlock(&inode->i_lock); | 2376 | spin_unlock(&inode->i_lock); |
2351 | if (writeback) | 2377 | if (writeback) |
@@ -2359,7 +2385,14 @@ static int handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, | |||
2359 | ceph_queue_invalidate(inode); | 2385 | ceph_queue_invalidate(inode); |
2360 | if (wake) | 2386 | if (wake) |
2361 | wake_up(&ci->i_cap_wq); | 2387 | wake_up(&ci->i_cap_wq); |
2362 | return reply; | 2388 | |
2389 | if (check_caps == 1) | ||
2390 | ceph_check_caps(ci, CHECK_CAPS_NODELAY|CHECK_CAPS_AUTHONLY, | ||
2391 | session); | ||
2392 | else if (check_caps == 2) | ||
2393 | ceph_check_caps(ci, CHECK_CAPS_NODELAY, session); | ||
2394 | else | ||
2395 | mutex_unlock(&session->s_mutex); | ||
2363 | } | 2396 | } |
2364 | 2397 | ||
2365 | /* | 2398 | /* |
@@ -2454,8 +2487,8 @@ static void handle_cap_flushsnap_ack(struct inode *inode, u64 flush_tid, | |||
2454 | break; | 2487 | break; |
2455 | } | 2488 | } |
2456 | WARN_ON(capsnap->dirty_pages || capsnap->writing); | 2489 | WARN_ON(capsnap->dirty_pages || capsnap->writing); |
2457 | dout(" removing cap_snap %p follows %lld\n", | 2490 | dout(" removing %p cap_snap %p follows %lld\n", |
2458 | capsnap, follows); | 2491 | inode, capsnap, follows); |
2459 | ceph_put_snap_context(capsnap->context); | 2492 | ceph_put_snap_context(capsnap->context); |
2460 | list_del(&capsnap->ci_item); | 2493 | list_del(&capsnap->ci_item); |
2461 | list_del(&capsnap->flushing_item); | 2494 | list_del(&capsnap->flushing_item); |
@@ -2548,9 +2581,8 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex, | |||
2548 | ci->i_cap_exporting_issued = cap->issued; | 2581 | ci->i_cap_exporting_issued = cap->issued; |
2549 | } | 2582 | } |
2550 | __ceph_remove_cap(cap); | 2583 | __ceph_remove_cap(cap); |
2551 | } else { | ||
2552 | WARN_ON(!cap); | ||
2553 | } | 2584 | } |
2585 | /* else, we already released it */ | ||
2554 | 2586 | ||
2555 | spin_unlock(&inode->i_lock); | 2587 | spin_unlock(&inode->i_lock); |
2556 | } | 2588 | } |
@@ -2621,9 +2653,7 @@ void ceph_handle_caps(struct ceph_mds_session *session, | |||
2621 | u64 cap_id; | 2653 | u64 cap_id; |
2622 | u64 size, max_size; | 2654 | u64 size, max_size; |
2623 | u64 tid; | 2655 | u64 tid; |
2624 | int check_caps = 0; | ||
2625 | void *snaptrace; | 2656 | void *snaptrace; |
2626 | int r; | ||
2627 | 2657 | ||
2628 | dout("handle_caps from mds%d\n", mds); | 2658 | dout("handle_caps from mds%d\n", mds); |
2629 | 2659 | ||
@@ -2668,8 +2698,9 @@ void ceph_handle_caps(struct ceph_mds_session *session, | |||
2668 | case CEPH_CAP_OP_IMPORT: | 2698 | case CEPH_CAP_OP_IMPORT: |
2669 | handle_cap_import(mdsc, inode, h, session, | 2699 | handle_cap_import(mdsc, inode, h, session, |
2670 | snaptrace, le32_to_cpu(h->snap_trace_len)); | 2700 | snaptrace, le32_to_cpu(h->snap_trace_len)); |
2671 | check_caps = 1; /* we may have sent a RELEASE to the old auth */ | 2701 | ceph_check_caps(ceph_inode(inode), CHECK_CAPS_NODELAY, |
2672 | goto done; | 2702 | session); |
2703 | goto done_unlocked; | ||
2673 | } | 2704 | } |
2674 | 2705 | ||
2675 | /* the rest require a cap */ | 2706 | /* the rest require a cap */ |
@@ -2686,16 +2717,8 @@ void ceph_handle_caps(struct ceph_mds_session *session, | |||
2686 | switch (op) { | 2717 | switch (op) { |
2687 | case CEPH_CAP_OP_REVOKE: | 2718 | case CEPH_CAP_OP_REVOKE: |
2688 | case CEPH_CAP_OP_GRANT: | 2719 | case CEPH_CAP_OP_GRANT: |
2689 | r = handle_cap_grant(inode, h, session, cap, msg->middle); | 2720 | handle_cap_grant(inode, h, session, cap, msg->middle); |
2690 | if (r == 1) | 2721 | goto done_unlocked; |
2691 | ceph_check_caps(ceph_inode(inode), | ||
2692 | CHECK_CAPS_NODELAY|CHECK_CAPS_AUTHONLY, | ||
2693 | session); | ||
2694 | else if (r == 2) | ||
2695 | ceph_check_caps(ceph_inode(inode), | ||
2696 | CHECK_CAPS_NODELAY, | ||
2697 | session); | ||
2698 | break; | ||
2699 | 2722 | ||
2700 | case CEPH_CAP_OP_FLUSH_ACK: | 2723 | case CEPH_CAP_OP_FLUSH_ACK: |
2701 | handle_cap_flush_ack(inode, tid, h, session, cap); | 2724 | handle_cap_flush_ack(inode, tid, h, session, cap); |
@@ -2713,9 +2736,7 @@ void ceph_handle_caps(struct ceph_mds_session *session, | |||
2713 | 2736 | ||
2714 | done: | 2737 | done: |
2715 | mutex_unlock(&session->s_mutex); | 2738 | mutex_unlock(&session->s_mutex); |
2716 | 2739 | done_unlocked: | |
2717 | if (check_caps) | ||
2718 | ceph_check_caps(ceph_inode(inode), CHECK_CAPS_NODELAY, NULL); | ||
2719 | if (inode) | 2740 | if (inode) |
2720 | iput(inode); | 2741 | iput(inode); |
2721 | return; | 2742 | return; |
@@ -2838,11 +2859,18 @@ int ceph_encode_inode_release(void **p, struct inode *inode, | |||
2838 | struct ceph_cap *cap; | 2859 | struct ceph_cap *cap; |
2839 | struct ceph_mds_request_release *rel = *p; | 2860 | struct ceph_mds_request_release *rel = *p; |
2840 | int ret = 0; | 2861 | int ret = 0; |
2841 | 2862 | int used = 0; | |
2842 | dout("encode_inode_release %p mds%d drop %s unless %s\n", inode, | ||
2843 | mds, ceph_cap_string(drop), ceph_cap_string(unless)); | ||
2844 | 2863 | ||
2845 | spin_lock(&inode->i_lock); | 2864 | spin_lock(&inode->i_lock); |
2865 | used = __ceph_caps_used(ci); | ||
2866 | |||
2867 | dout("encode_inode_release %p mds%d used %s drop %s unless %s\n", inode, | ||
2868 | mds, ceph_cap_string(used), ceph_cap_string(drop), | ||
2869 | ceph_cap_string(unless)); | ||
2870 | |||
2871 | /* only drop unused caps */ | ||
2872 | drop &= ~used; | ||
2873 | |||
2846 | cap = __get_cap_for_mds(ci, mds); | 2874 | cap = __get_cap_for_mds(ci, mds); |
2847 | if (cap && __cap_is_valid(cap)) { | 2875 | if (cap && __cap_is_valid(cap)) { |
2848 | if (force || | 2876 | if (force || |