aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ceph/caps.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ceph/caps.c')
-rw-r--r--fs/ceph/caps.c108
1 files changed, 57 insertions, 51 deletions
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index a2069b6680ae..98ab13e2b71d 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -1,4 +1,4 @@
1#include "ceph_debug.h" 1#include <linux/ceph/ceph_debug.h>
2 2
3#include <linux/fs.h> 3#include <linux/fs.h>
4#include <linux/kernel.h> 4#include <linux/kernel.h>
@@ -9,8 +9,9 @@
9#include <linux/writeback.h> 9#include <linux/writeback.h>
10 10
11#include "super.h" 11#include "super.h"
12#include "decode.h" 12#include "mds_client.h"
13#include "messenger.h" 13#include <linux/ceph/decode.h>
14#include <linux/ceph/messenger.h>
14 15
15/* 16/*
16 * Capability management 17 * Capability management
@@ -287,11 +288,11 @@ void ceph_put_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap)
287 spin_unlock(&mdsc->caps_list_lock); 288 spin_unlock(&mdsc->caps_list_lock);
288} 289}
289 290
290void ceph_reservation_status(struct ceph_client *client, 291void ceph_reservation_status(struct ceph_fs_client *fsc,
291 int *total, int *avail, int *used, int *reserved, 292 int *total, int *avail, int *used, int *reserved,
292 int *min) 293 int *min)
293{ 294{
294 struct ceph_mds_client *mdsc = &client->mdsc; 295 struct ceph_mds_client *mdsc = fsc->mdsc;
295 296
296 if (total) 297 if (total)
297 *total = mdsc->caps_total_count; 298 *total = mdsc->caps_total_count;
@@ -399,7 +400,7 @@ static void __insert_cap_node(struct ceph_inode_info *ci,
399static void __cap_set_timeouts(struct ceph_mds_client *mdsc, 400static void __cap_set_timeouts(struct ceph_mds_client *mdsc,
400 struct ceph_inode_info *ci) 401 struct ceph_inode_info *ci)
401{ 402{
402 struct ceph_mount_args *ma = mdsc->client->mount_args; 403 struct ceph_mount_options *ma = mdsc->fsc->mount_options;
403 404
404 ci->i_hold_caps_min = round_jiffies(jiffies + 405 ci->i_hold_caps_min = round_jiffies(jiffies +
405 ma->caps_wanted_delay_min * HZ); 406 ma->caps_wanted_delay_min * HZ);
@@ -515,7 +516,7 @@ int ceph_add_cap(struct inode *inode,
515 unsigned seq, unsigned mseq, u64 realmino, int flags, 516 unsigned seq, unsigned mseq, u64 realmino, int flags,
516 struct ceph_cap_reservation *caps_reservation) 517 struct ceph_cap_reservation *caps_reservation)
517{ 518{
518 struct ceph_mds_client *mdsc = &ceph_inode_to_client(inode)->mdsc; 519 struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
519 struct ceph_inode_info *ci = ceph_inode(inode); 520 struct ceph_inode_info *ci = ceph_inode(inode);
520 struct ceph_cap *new_cap = NULL; 521 struct ceph_cap *new_cap = NULL;
521 struct ceph_cap *cap; 522 struct ceph_cap *cap;
@@ -814,7 +815,7 @@ int __ceph_caps_used(struct ceph_inode_info *ci)
814 used |= CEPH_CAP_PIN; 815 used |= CEPH_CAP_PIN;
815 if (ci->i_rd_ref) 816 if (ci->i_rd_ref)
816 used |= CEPH_CAP_FILE_RD; 817 used |= CEPH_CAP_FILE_RD;
817 if (ci->i_rdcache_ref || ci->i_rdcache_gen) 818 if (ci->i_rdcache_ref || ci->vfs_inode.i_data.nrpages)
818 used |= CEPH_CAP_FILE_CACHE; 819 used |= CEPH_CAP_FILE_CACHE;
819 if (ci->i_wr_ref) 820 if (ci->i_wr_ref)
820 used |= CEPH_CAP_FILE_WR; 821 used |= CEPH_CAP_FILE_WR;
@@ -873,7 +874,7 @@ void __ceph_remove_cap(struct ceph_cap *cap)
873 struct ceph_mds_session *session = cap->session; 874 struct ceph_mds_session *session = cap->session;
874 struct ceph_inode_info *ci = cap->ci; 875 struct ceph_inode_info *ci = cap->ci;
875 struct ceph_mds_client *mdsc = 876 struct ceph_mds_client *mdsc =
876 &ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc; 877 ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc;
877 int removed = 0; 878 int removed = 0;
878 879
879 dout("__ceph_remove_cap %p from %p\n", cap, &ci->vfs_inode); 880 dout("__ceph_remove_cap %p from %p\n", cap, &ci->vfs_inode);
@@ -1195,10 +1196,14 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
1195 * asynchronously back to the MDS once sync writes complete and dirty 1196 * asynchronously back to the MDS once sync writes complete and dirty
1196 * data is written out. 1197 * data is written out.
1197 * 1198 *
1199 * Unless @again is true, skip cap_snaps that were already sent to
1200 * the MDS (i.e., during this session).
1201 *
1198 * Called under i_lock. Takes s_mutex as needed. 1202 * Called under i_lock. Takes s_mutex as needed.
1199 */ 1203 */
1200void __ceph_flush_snaps(struct ceph_inode_info *ci, 1204void __ceph_flush_snaps(struct ceph_inode_info *ci,
1201 struct ceph_mds_session **psession) 1205 struct ceph_mds_session **psession,
1206 int again)
1202 __releases(ci->vfs_inode->i_lock) 1207 __releases(ci->vfs_inode->i_lock)
1203 __acquires(ci->vfs_inode->i_lock) 1208 __acquires(ci->vfs_inode->i_lock)
1204{ 1209{
@@ -1206,7 +1211,7 @@ void __ceph_flush_snaps(struct ceph_inode_info *ci,
1206 int mds; 1211 int mds;
1207 struct ceph_cap_snap *capsnap; 1212 struct ceph_cap_snap *capsnap;
1208 u32 mseq; 1213 u32 mseq;
1209 struct ceph_mds_client *mdsc = &ceph_inode_to_client(inode)->mdsc; 1214 struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
1210 struct ceph_mds_session *session = NULL; /* if session != NULL, we hold 1215 struct ceph_mds_session *session = NULL; /* if session != NULL, we hold
1211 session->s_mutex */ 1216 session->s_mutex */
1212 u64 next_follows = 0; /* keep track of how far we've gotten through the 1217 u64 next_follows = 0; /* keep track of how far we've gotten through the
@@ -1227,7 +1232,7 @@ retry:
1227 * pages to be written out. 1232 * pages to be written out.
1228 */ 1233 */
1229 if (capsnap->dirty_pages || capsnap->writing) 1234 if (capsnap->dirty_pages || capsnap->writing)
1230 continue; 1235 break;
1231 1236
1232 /* 1237 /*
1233 * if cap writeback already occurred, we should have dropped 1238 * if cap writeback already occurred, we should have dropped
@@ -1240,6 +1245,13 @@ retry:
1240 dout("no auth cap (migrating?), doing nothing\n"); 1245 dout("no auth cap (migrating?), doing nothing\n");
1241 goto out; 1246 goto out;
1242 } 1247 }
1248
1249 /* only flush each capsnap once */
1250 if (!again && !list_empty(&capsnap->flushing_item)) {
1251 dout("already flushed %p, skipping\n", capsnap);
1252 continue;
1253 }
1254
1243 mds = ci->i_auth_cap->session->s_mds; 1255 mds = ci->i_auth_cap->session->s_mds;
1244 mseq = ci->i_auth_cap->mseq; 1256 mseq = ci->i_auth_cap->mseq;
1245 1257
@@ -1276,8 +1288,8 @@ retry:
1276 &session->s_cap_snaps_flushing); 1288 &session->s_cap_snaps_flushing);
1277 spin_unlock(&inode->i_lock); 1289 spin_unlock(&inode->i_lock);
1278 1290
1279 dout("flush_snaps %p cap_snap %p follows %lld size %llu\n", 1291 dout("flush_snaps %p cap_snap %p follows %lld tid %llu\n",
1280 inode, capsnap, next_follows, capsnap->size); 1292 inode, capsnap, capsnap->follows, capsnap->flush_tid);
1281 send_cap_msg(session, ceph_vino(inode).ino, 0, 1293 send_cap_msg(session, ceph_vino(inode).ino, 0,
1282 CEPH_CAP_OP_FLUSHSNAP, capsnap->issued, 0, 1294 CEPH_CAP_OP_FLUSHSNAP, capsnap->issued, 0,
1283 capsnap->dirty, 0, capsnap->flush_tid, 0, mseq, 1295 capsnap->dirty, 0, capsnap->flush_tid, 0, mseq,
@@ -1314,7 +1326,7 @@ static void ceph_flush_snaps(struct ceph_inode_info *ci)
1314 struct inode *inode = &ci->vfs_inode; 1326 struct inode *inode = &ci->vfs_inode;
1315 1327
1316 spin_lock(&inode->i_lock); 1328 spin_lock(&inode->i_lock);
1317 __ceph_flush_snaps(ci, NULL); 1329 __ceph_flush_snaps(ci, NULL, 0);
1318 spin_unlock(&inode->i_lock); 1330 spin_unlock(&inode->i_lock);
1319} 1331}
1320 1332
@@ -1325,7 +1337,7 @@ static void ceph_flush_snaps(struct ceph_inode_info *ci)
1325void __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask) 1337void __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask)
1326{ 1338{
1327 struct ceph_mds_client *mdsc = 1339 struct ceph_mds_client *mdsc =
1328 &ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc; 1340 ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc;
1329 struct inode *inode = &ci->vfs_inode; 1341 struct inode *inode = &ci->vfs_inode;
1330 int was = ci->i_dirty_caps; 1342 int was = ci->i_dirty_caps;
1331 int dirty = 0; 1343 int dirty = 0;
@@ -1367,7 +1379,7 @@ void __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask)
1367static int __mark_caps_flushing(struct inode *inode, 1379static int __mark_caps_flushing(struct inode *inode,
1368 struct ceph_mds_session *session) 1380 struct ceph_mds_session *session)
1369{ 1381{
1370 struct ceph_mds_client *mdsc = &ceph_sb_to_client(inode->i_sb)->mdsc; 1382 struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
1371 struct ceph_inode_info *ci = ceph_inode(inode); 1383 struct ceph_inode_info *ci = ceph_inode(inode);
1372 int flushing; 1384 int flushing;
1373 1385
@@ -1405,17 +1417,6 @@ static int __mark_caps_flushing(struct inode *inode,
1405/* 1417/*
1406 * try to invalidate mapping pages without blocking. 1418 * try to invalidate mapping pages without blocking.
1407 */ 1419 */
1408static int mapping_is_empty(struct address_space *mapping)
1409{
1410 struct page *page = find_get_page(mapping, 0);
1411
1412 if (!page)
1413 return 1;
1414
1415 put_page(page);
1416 return 0;
1417}
1418
1419static int try_nonblocking_invalidate(struct inode *inode) 1420static int try_nonblocking_invalidate(struct inode *inode)
1420{ 1421{
1421 struct ceph_inode_info *ci = ceph_inode(inode); 1422 struct ceph_inode_info *ci = ceph_inode(inode);
@@ -1425,7 +1426,7 @@ static int try_nonblocking_invalidate(struct inode *inode)
1425 invalidate_mapping_pages(&inode->i_data, 0, -1); 1426 invalidate_mapping_pages(&inode->i_data, 0, -1);
1426 spin_lock(&inode->i_lock); 1427 spin_lock(&inode->i_lock);
1427 1428
1428 if (mapping_is_empty(&inode->i_data) && 1429 if (inode->i_data.nrpages == 0 &&
1429 invalidating_gen == ci->i_rdcache_gen) { 1430 invalidating_gen == ci->i_rdcache_gen) {
1430 /* success. */ 1431 /* success. */
1431 dout("try_nonblocking_invalidate %p success\n", inode); 1432 dout("try_nonblocking_invalidate %p success\n", inode);
@@ -1451,8 +1452,8 @@ static int try_nonblocking_invalidate(struct inode *inode)
1451void ceph_check_caps(struct ceph_inode_info *ci, int flags, 1452void ceph_check_caps(struct ceph_inode_info *ci, int flags,
1452 struct ceph_mds_session *session) 1453 struct ceph_mds_session *session)
1453{ 1454{
1454 struct ceph_client *client = ceph_inode_to_client(&ci->vfs_inode); 1455 struct ceph_fs_client *fsc = ceph_inode_to_client(&ci->vfs_inode);
1455 struct ceph_mds_client *mdsc = &client->mdsc; 1456 struct ceph_mds_client *mdsc = fsc->mdsc;
1456 struct inode *inode = &ci->vfs_inode; 1457 struct inode *inode = &ci->vfs_inode;
1457 struct ceph_cap *cap; 1458 struct ceph_cap *cap;
1458 int file_wanted, used; 1459 int file_wanted, used;
@@ -1477,7 +1478,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
1477 1478
1478 /* flush snaps first time around only */ 1479 /* flush snaps first time around only */
1479 if (!list_empty(&ci->i_cap_snaps)) 1480 if (!list_empty(&ci->i_cap_snaps))
1480 __ceph_flush_snaps(ci, &session); 1481 __ceph_flush_snaps(ci, &session, 0);
1481 goto retry_locked; 1482 goto retry_locked;
1482retry: 1483retry:
1483 spin_lock(&inode->i_lock); 1484 spin_lock(&inode->i_lock);
@@ -1522,7 +1523,7 @@ retry_locked:
1522 */ 1523 */
1523 if ((!is_delayed || mdsc->stopping) && 1524 if ((!is_delayed || mdsc->stopping) &&
1524 ci->i_wrbuffer_ref == 0 && /* no dirty pages... */ 1525 ci->i_wrbuffer_ref == 0 && /* no dirty pages... */
1525 ci->i_rdcache_gen && /* may have cached pages */ 1526 inode->i_data.nrpages && /* have cached pages */
1526 (file_wanted == 0 || /* no open files */ 1527 (file_wanted == 0 || /* no open files */
1527 (revoking & (CEPH_CAP_FILE_CACHE| 1528 (revoking & (CEPH_CAP_FILE_CACHE|
1528 CEPH_CAP_FILE_LAZYIO))) && /* or revoking cache */ 1529 CEPH_CAP_FILE_LAZYIO))) && /* or revoking cache */
@@ -1695,7 +1696,7 @@ ack:
1695static int try_flush_caps(struct inode *inode, struct ceph_mds_session *session, 1696static int try_flush_caps(struct inode *inode, struct ceph_mds_session *session,
1696 unsigned *flush_tid) 1697 unsigned *flush_tid)
1697{ 1698{
1698 struct ceph_mds_client *mdsc = &ceph_sb_to_client(inode->i_sb)->mdsc; 1699 struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
1699 struct ceph_inode_info *ci = ceph_inode(inode); 1700 struct ceph_inode_info *ci = ceph_inode(inode);
1700 int unlock_session = session ? 0 : 1; 1701 int unlock_session = session ? 0 : 1;
1701 int flushing = 0; 1702 int flushing = 0;
@@ -1861,7 +1862,7 @@ int ceph_write_inode(struct inode *inode, struct writeback_control *wbc)
1861 caps_are_flushed(inode, flush_tid)); 1862 caps_are_flushed(inode, flush_tid));
1862 } else { 1863 } else {
1863 struct ceph_mds_client *mdsc = 1864 struct ceph_mds_client *mdsc =
1864 &ceph_sb_to_client(inode->i_sb)->mdsc; 1865 ceph_sb_to_client(inode->i_sb)->mdsc;
1865 1866
1866 spin_lock(&inode->i_lock); 1867 spin_lock(&inode->i_lock);
1867 if (__ceph_caps_dirty(ci)) 1868 if (__ceph_caps_dirty(ci))
@@ -1894,7 +1895,7 @@ static void kick_flushing_capsnaps(struct ceph_mds_client *mdsc,
1894 if (cap && cap->session == session) { 1895 if (cap && cap->session == session) {
1895 dout("kick_flushing_caps %p cap %p capsnap %p\n", inode, 1896 dout("kick_flushing_caps %p cap %p capsnap %p\n", inode,
1896 cap, capsnap); 1897 cap, capsnap);
1897 __ceph_flush_snaps(ci, &session); 1898 __ceph_flush_snaps(ci, &session, 1);
1898 } else { 1899 } else {
1899 pr_err("%p auth cap %p not mds%d ???\n", inode, 1900 pr_err("%p auth cap %p not mds%d ???\n", inode,
1900 cap, session->s_mds); 1901 cap, session->s_mds);
@@ -2272,7 +2273,8 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
2272{ 2273{
2273 struct ceph_inode_info *ci = ceph_inode(inode); 2274 struct ceph_inode_info *ci = ceph_inode(inode);
2274 int mds = session->s_mds; 2275 int mds = session->s_mds;
2275 int seq = le32_to_cpu(grant->seq); 2276 unsigned seq = le32_to_cpu(grant->seq);
2277 unsigned issue_seq = le32_to_cpu(grant->issue_seq);
2276 int newcaps = le32_to_cpu(grant->caps); 2278 int newcaps = le32_to_cpu(grant->caps);
2277 int issued, implemented, used, wanted, dirty; 2279 int issued, implemented, used, wanted, dirty;
2278 u64 size = le64_to_cpu(grant->size); 2280 u64 size = le64_to_cpu(grant->size);
@@ -2284,8 +2286,8 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
2284 int revoked_rdcache = 0; 2286 int revoked_rdcache = 0;
2285 int queue_invalidate = 0; 2287 int queue_invalidate = 0;
2286 2288
2287 dout("handle_cap_grant inode %p cap %p mds%d seq %d %s\n", 2289 dout("handle_cap_grant inode %p cap %p mds%d seq %u/%u %s\n",
2288 inode, cap, mds, seq, ceph_cap_string(newcaps)); 2290 inode, cap, mds, seq, issue_seq, ceph_cap_string(newcaps));
2289 dout(" size %llu max_size %llu, i_size %llu\n", size, max_size, 2291 dout(" size %llu max_size %llu, i_size %llu\n", size, max_size,
2290 inode->i_size); 2292 inode->i_size);
2291 2293
@@ -2381,6 +2383,7 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
2381 } 2383 }
2382 2384
2383 cap->seq = seq; 2385 cap->seq = seq;
2386 cap->issue_seq = issue_seq;
2384 2387
2385 /* file layout may have changed */ 2388 /* file layout may have changed */
2386 ci->i_layout = grant->layout; 2389 ci->i_layout = grant->layout;
@@ -2452,7 +2455,7 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid,
2452 __releases(inode->i_lock) 2455 __releases(inode->i_lock)
2453{ 2456{
2454 struct ceph_inode_info *ci = ceph_inode(inode); 2457 struct ceph_inode_info *ci = ceph_inode(inode);
2455 struct ceph_mds_client *mdsc = &ceph_sb_to_client(inode->i_sb)->mdsc; 2458 struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
2456 unsigned seq = le32_to_cpu(m->seq); 2459 unsigned seq = le32_to_cpu(m->seq);
2457 int dirty = le32_to_cpu(m->dirty); 2460 int dirty = le32_to_cpu(m->dirty);
2458 int cleaned = 0; 2461 int cleaned = 0;
@@ -2700,7 +2703,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
2700 struct ceph_msg *msg) 2703 struct ceph_msg *msg)
2701{ 2704{
2702 struct ceph_mds_client *mdsc = session->s_mdsc; 2705 struct ceph_mds_client *mdsc = session->s_mdsc;
2703 struct super_block *sb = mdsc->client->sb; 2706 struct super_block *sb = mdsc->fsc->sb;
2704 struct inode *inode; 2707 struct inode *inode;
2705 struct ceph_cap *cap; 2708 struct ceph_cap *cap;
2706 struct ceph_mds_caps *h; 2709 struct ceph_mds_caps *h;
@@ -2763,15 +2766,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
2763 if (op == CEPH_CAP_OP_IMPORT) 2766 if (op == CEPH_CAP_OP_IMPORT)
2764 __queue_cap_release(session, vino.ino, cap_id, 2767 __queue_cap_release(session, vino.ino, cap_id,
2765 mseq, seq); 2768 mseq, seq);
2766 2769 goto flush_cap_releases;
2767 /*
2768 * send any full release message to try to move things
2769 * along for the mds (who clearly thinks we still have this
2770 * cap).
2771 */
2772 ceph_add_cap_releases(mdsc, session);
2773 ceph_send_cap_releases(mdsc, session);
2774 goto done;
2775 } 2770 }
2776 2771
2777 /* these will work even if we don't have a cap yet */ 2772 /* these will work even if we don't have a cap yet */
@@ -2799,7 +2794,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
2799 dout(" no cap on %p ino %llx.%llx from mds%d\n", 2794 dout(" no cap on %p ino %llx.%llx from mds%d\n",
2800 inode, ceph_ino(inode), ceph_snap(inode), mds); 2795 inode, ceph_ino(inode), ceph_snap(inode), mds);
2801 spin_unlock(&inode->i_lock); 2796 spin_unlock(&inode->i_lock);
2802 goto done; 2797 goto flush_cap_releases;
2803 } 2798 }
2804 2799
2805 /* note that each of these drops i_lock for us */ 2800 /* note that each of these drops i_lock for us */
@@ -2823,6 +2818,17 @@ void ceph_handle_caps(struct ceph_mds_session *session,
2823 ceph_cap_op_name(op)); 2818 ceph_cap_op_name(op));
2824 } 2819 }
2825 2820
2821 goto done;
2822
2823flush_cap_releases:
2824 /*
2825 * send any full release message to try to move things
2826 * along for the mds (who clearly thinks we still have this
2827 * cap).
2828 */
2829 ceph_add_cap_releases(mdsc, session);
2830 ceph_send_cap_releases(mdsc, session);
2831
2826done: 2832done:
2827 mutex_unlock(&session->s_mutex); 2833 mutex_unlock(&session->s_mutex);
2828done_unlocked: 2834done_unlocked: