diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2010-09-21 14:20:10 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2010-09-21 14:20:10 -0400 |
commit | 19746cad00a00a7a2e3eb0640d317d6e7c2e8cc0 (patch) | |
tree | 3dfd66dce28b3b02f7e293b28b57191ca23f691c | |
parent | 0ffe37de76a57ba38d960e370c2f8f1d799c94a1 (diff) | |
parent | be4f104dfd3b5e3ae262bff607965cfc38027dec (diff) |
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client:
ceph: select CRYPTO
ceph: check mapping to determine if FILE_CACHE cap is used
ceph: only send one flushsnap per cap_snap per mds session
ceph: fix cap_snap and realm split
ceph: stop sending FLUSHSNAPs when we hit a dirty capsnap
ceph: correctly set 'follows' in flushsnap messages
ceph: fix dn offset during readdir_prepopulate
ceph: fix file offset wrapping at 4GB on 32-bit archs
ceph: fix reconnect encoding for old servers
ceph: fix pagelist kunmap tail
ceph: fix null pointer deref on anon root dentry release
-rw-r--r-- | fs/ceph/Kconfig | 1 | ||||
-rw-r--r-- | fs/ceph/addr.c | 7 | ||||
-rw-r--r-- | fs/ceph/caps.c | 27 | ||||
-rw-r--r-- | fs/ceph/dir.c | 10 | ||||
-rw-r--r-- | fs/ceph/inode.c | 11 | ||||
-rw-r--r-- | fs/ceph/mds_client.c | 2 | ||||
-rw-r--r-- | fs/ceph/pagelist.c | 12 | ||||
-rw-r--r-- | fs/ceph/snap.c | 92 | ||||
-rw-r--r-- | fs/ceph/super.h | 5 |
9 files changed, 84 insertions, 83 deletions
diff --git a/fs/ceph/Kconfig b/fs/ceph/Kconfig index bc87b9c1d27e..0fcd2640c23f 100644 --- a/fs/ceph/Kconfig +++ b/fs/ceph/Kconfig | |||
@@ -3,6 +3,7 @@ config CEPH_FS | |||
3 | depends on INET && EXPERIMENTAL | 3 | depends on INET && EXPERIMENTAL |
4 | select LIBCRC32C | 4 | select LIBCRC32C |
5 | select CRYPTO_AES | 5 | select CRYPTO_AES |
6 | select CRYPTO | ||
6 | help | 7 | help |
7 | Choose Y or M here to include support for mounting the | 8 | Choose Y or M here to include support for mounting the |
8 | experimental Ceph distributed file system. Ceph is an extremely | 9 | experimental Ceph distributed file system. Ceph is an extremely |
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 4cfce1ee31fa..efbc604001c8 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c | |||
@@ -411,8 +411,8 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) | |||
411 | if (i_size < page_off + len) | 411 | if (i_size < page_off + len) |
412 | len = i_size - page_off; | 412 | len = i_size - page_off; |
413 | 413 | ||
414 | dout("writepage %p page %p index %lu on %llu~%u\n", | 414 | dout("writepage %p page %p index %lu on %llu~%u snapc %p\n", |
415 | inode, page, page->index, page_off, len); | 415 | inode, page, page->index, page_off, len, snapc); |
416 | 416 | ||
417 | writeback_stat = atomic_long_inc_return(&client->writeback_count); | 417 | writeback_stat = atomic_long_inc_return(&client->writeback_count); |
418 | if (writeback_stat > | 418 | if (writeback_stat > |
@@ -766,7 +766,8 @@ get_more_pages: | |||
766 | /* ok */ | 766 | /* ok */ |
767 | if (locked_pages == 0) { | 767 | if (locked_pages == 0) { |
768 | /* prepare async write request */ | 768 | /* prepare async write request */ |
769 | offset = page->index << PAGE_CACHE_SHIFT; | 769 | offset = (unsigned long long)page->index |
770 | << PAGE_CACHE_SHIFT; | ||
770 | len = wsize; | 771 | len = wsize; |
771 | req = ceph_osdc_new_request(&client->osdc, | 772 | req = ceph_osdc_new_request(&client->osdc, |
772 | &ci->i_layout, | 773 | &ci->i_layout, |
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index a2069b6680ae..73c153092f72 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c | |||
@@ -814,7 +814,7 @@ int __ceph_caps_used(struct ceph_inode_info *ci) | |||
814 | used |= CEPH_CAP_PIN; | 814 | used |= CEPH_CAP_PIN; |
815 | if (ci->i_rd_ref) | 815 | if (ci->i_rd_ref) |
816 | used |= CEPH_CAP_FILE_RD; | 816 | used |= CEPH_CAP_FILE_RD; |
817 | if (ci->i_rdcache_ref || ci->i_rdcache_gen) | 817 | if (ci->i_rdcache_ref || ci->vfs_inode.i_data.nrpages) |
818 | used |= CEPH_CAP_FILE_CACHE; | 818 | used |= CEPH_CAP_FILE_CACHE; |
819 | if (ci->i_wr_ref) | 819 | if (ci->i_wr_ref) |
820 | used |= CEPH_CAP_FILE_WR; | 820 | used |= CEPH_CAP_FILE_WR; |
@@ -1195,10 +1195,14 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap, | |||
1195 | * asynchronously back to the MDS once sync writes complete and dirty | 1195 | * asynchronously back to the MDS once sync writes complete and dirty |
1196 | * data is written out. | 1196 | * data is written out. |
1197 | * | 1197 | * |
1198 | * Unless @again is true, skip cap_snaps that were already sent to | ||
1199 | * the MDS (i.e., during this session). | ||
1200 | * | ||
1198 | * Called under i_lock. Takes s_mutex as needed. | 1201 | * Called under i_lock. Takes s_mutex as needed. |
1199 | */ | 1202 | */ |
1200 | void __ceph_flush_snaps(struct ceph_inode_info *ci, | 1203 | void __ceph_flush_snaps(struct ceph_inode_info *ci, |
1201 | struct ceph_mds_session **psession) | 1204 | struct ceph_mds_session **psession, |
1205 | int again) | ||
1202 | __releases(ci->vfs_inode->i_lock) | 1206 | __releases(ci->vfs_inode->i_lock) |
1203 | __acquires(ci->vfs_inode->i_lock) | 1207 | __acquires(ci->vfs_inode->i_lock) |
1204 | { | 1208 | { |
@@ -1227,7 +1231,7 @@ retry: | |||
1227 | * pages to be written out. | 1231 | * pages to be written out. |
1228 | */ | 1232 | */ |
1229 | if (capsnap->dirty_pages || capsnap->writing) | 1233 | if (capsnap->dirty_pages || capsnap->writing) |
1230 | continue; | 1234 | break; |
1231 | 1235 | ||
1232 | /* | 1236 | /* |
1233 | * if cap writeback already occurred, we should have dropped | 1237 | * if cap writeback already occurred, we should have dropped |
@@ -1240,6 +1244,13 @@ retry: | |||
1240 | dout("no auth cap (migrating?), doing nothing\n"); | 1244 | dout("no auth cap (migrating?), doing nothing\n"); |
1241 | goto out; | 1245 | goto out; |
1242 | } | 1246 | } |
1247 | |||
1248 | /* only flush each capsnap once */ | ||
1249 | if (!again && !list_empty(&capsnap->flushing_item)) { | ||
1250 | dout("already flushed %p, skipping\n", capsnap); | ||
1251 | continue; | ||
1252 | } | ||
1253 | |||
1243 | mds = ci->i_auth_cap->session->s_mds; | 1254 | mds = ci->i_auth_cap->session->s_mds; |
1244 | mseq = ci->i_auth_cap->mseq; | 1255 | mseq = ci->i_auth_cap->mseq; |
1245 | 1256 | ||
@@ -1276,8 +1287,8 @@ retry: | |||
1276 | &session->s_cap_snaps_flushing); | 1287 | &session->s_cap_snaps_flushing); |
1277 | spin_unlock(&inode->i_lock); | 1288 | spin_unlock(&inode->i_lock); |
1278 | 1289 | ||
1279 | dout("flush_snaps %p cap_snap %p follows %lld size %llu\n", | 1290 | dout("flush_snaps %p cap_snap %p follows %lld tid %llu\n", |
1280 | inode, capsnap, next_follows, capsnap->size); | 1291 | inode, capsnap, capsnap->follows, capsnap->flush_tid); |
1281 | send_cap_msg(session, ceph_vino(inode).ino, 0, | 1292 | send_cap_msg(session, ceph_vino(inode).ino, 0, |
1282 | CEPH_CAP_OP_FLUSHSNAP, capsnap->issued, 0, | 1293 | CEPH_CAP_OP_FLUSHSNAP, capsnap->issued, 0, |
1283 | capsnap->dirty, 0, capsnap->flush_tid, 0, mseq, | 1294 | capsnap->dirty, 0, capsnap->flush_tid, 0, mseq, |
@@ -1314,7 +1325,7 @@ static void ceph_flush_snaps(struct ceph_inode_info *ci) | |||
1314 | struct inode *inode = &ci->vfs_inode; | 1325 | struct inode *inode = &ci->vfs_inode; |
1315 | 1326 | ||
1316 | spin_lock(&inode->i_lock); | 1327 | spin_lock(&inode->i_lock); |
1317 | __ceph_flush_snaps(ci, NULL); | 1328 | __ceph_flush_snaps(ci, NULL, 0); |
1318 | spin_unlock(&inode->i_lock); | 1329 | spin_unlock(&inode->i_lock); |
1319 | } | 1330 | } |
1320 | 1331 | ||
@@ -1477,7 +1488,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags, | |||
1477 | 1488 | ||
1478 | /* flush snaps first time around only */ | 1489 | /* flush snaps first time around only */ |
1479 | if (!list_empty(&ci->i_cap_snaps)) | 1490 | if (!list_empty(&ci->i_cap_snaps)) |
1480 | __ceph_flush_snaps(ci, &session); | 1491 | __ceph_flush_snaps(ci, &session, 0); |
1481 | goto retry_locked; | 1492 | goto retry_locked; |
1482 | retry: | 1493 | retry: |
1483 | spin_lock(&inode->i_lock); | 1494 | spin_lock(&inode->i_lock); |
@@ -1894,7 +1905,7 @@ static void kick_flushing_capsnaps(struct ceph_mds_client *mdsc, | |||
1894 | if (cap && cap->session == session) { | 1905 | if (cap && cap->session == session) { |
1895 | dout("kick_flushing_caps %p cap %p capsnap %p\n", inode, | 1906 | dout("kick_flushing_caps %p cap %p capsnap %p\n", inode, |
1896 | cap, capsnap); | 1907 | cap, capsnap); |
1897 | __ceph_flush_snaps(ci, &session); | 1908 | __ceph_flush_snaps(ci, &session, 1); |
1898 | } else { | 1909 | } else { |
1899 | pr_err("%p auth cap %p not mds%d ???\n", inode, | 1910 | pr_err("%p auth cap %p not mds%d ???\n", inode, |
1900 | cap, session->s_mds); | 1911 | cap, session->s_mds); |
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 6e4f43ff23ec..a1986eb52045 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c | |||
@@ -1021,11 +1021,15 @@ out_touch: | |||
1021 | static void ceph_dentry_release(struct dentry *dentry) | 1021 | static void ceph_dentry_release(struct dentry *dentry) |
1022 | { | 1022 | { |
1023 | struct ceph_dentry_info *di = ceph_dentry(dentry); | 1023 | struct ceph_dentry_info *di = ceph_dentry(dentry); |
1024 | struct inode *parent_inode = dentry->d_parent->d_inode; | 1024 | struct inode *parent_inode = NULL; |
1025 | u64 snapid = ceph_snap(parent_inode); | 1025 | u64 snapid = CEPH_NOSNAP; |
1026 | 1026 | ||
1027 | if (!IS_ROOT(dentry)) { | ||
1028 | parent_inode = dentry->d_parent->d_inode; | ||
1029 | if (parent_inode) | ||
1030 | snapid = ceph_snap(parent_inode); | ||
1031 | } | ||
1027 | dout("dentry_release %p parent %p\n", dentry, parent_inode); | 1032 | dout("dentry_release %p parent %p\n", dentry, parent_inode); |
1028 | |||
1029 | if (parent_inode && snapid != CEPH_SNAPDIR) { | 1033 | if (parent_inode && snapid != CEPH_SNAPDIR) { |
1030 | struct ceph_inode_info *ci = ceph_inode(parent_inode); | 1034 | struct ceph_inode_info *ci = ceph_inode(parent_inode); |
1031 | 1035 | ||
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index e7cca414da03..62377ec37edf 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c | |||
@@ -845,7 +845,7 @@ static void ceph_set_dentry_offset(struct dentry *dn) | |||
845 | * the caller) if we fail. | 845 | * the caller) if we fail. |
846 | */ | 846 | */ |
847 | static struct dentry *splice_dentry(struct dentry *dn, struct inode *in, | 847 | static struct dentry *splice_dentry(struct dentry *dn, struct inode *in, |
848 | bool *prehash) | 848 | bool *prehash, bool set_offset) |
849 | { | 849 | { |
850 | struct dentry *realdn; | 850 | struct dentry *realdn; |
851 | 851 | ||
@@ -877,7 +877,8 @@ static struct dentry *splice_dentry(struct dentry *dn, struct inode *in, | |||
877 | } | 877 | } |
878 | if ((!prehash || *prehash) && d_unhashed(dn)) | 878 | if ((!prehash || *prehash) && d_unhashed(dn)) |
879 | d_rehash(dn); | 879 | d_rehash(dn); |
880 | ceph_set_dentry_offset(dn); | 880 | if (set_offset) |
881 | ceph_set_dentry_offset(dn); | ||
881 | out: | 882 | out: |
882 | return dn; | 883 | return dn; |
883 | } | 884 | } |
@@ -1062,7 +1063,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, | |||
1062 | d_delete(dn); | 1063 | d_delete(dn); |
1063 | goto done; | 1064 | goto done; |
1064 | } | 1065 | } |
1065 | dn = splice_dentry(dn, in, &have_lease); | 1066 | dn = splice_dentry(dn, in, &have_lease, true); |
1066 | if (IS_ERR(dn)) { | 1067 | if (IS_ERR(dn)) { |
1067 | err = PTR_ERR(dn); | 1068 | err = PTR_ERR(dn); |
1068 | goto done; | 1069 | goto done; |
@@ -1105,7 +1106,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, | |||
1105 | goto done; | 1106 | goto done; |
1106 | } | 1107 | } |
1107 | dout(" linking snapped dir %p to dn %p\n", in, dn); | 1108 | dout(" linking snapped dir %p to dn %p\n", in, dn); |
1108 | dn = splice_dentry(dn, in, NULL); | 1109 | dn = splice_dentry(dn, in, NULL, true); |
1109 | if (IS_ERR(dn)) { | 1110 | if (IS_ERR(dn)) { |
1110 | err = PTR_ERR(dn); | 1111 | err = PTR_ERR(dn); |
1111 | goto done; | 1112 | goto done; |
@@ -1237,7 +1238,7 @@ retry_lookup: | |||
1237 | err = PTR_ERR(in); | 1238 | err = PTR_ERR(in); |
1238 | goto out; | 1239 | goto out; |
1239 | } | 1240 | } |
1240 | dn = splice_dentry(dn, in, NULL); | 1241 | dn = splice_dentry(dn, in, NULL, false); |
1241 | if (IS_ERR(dn)) | 1242 | if (IS_ERR(dn)) |
1242 | dn = NULL; | 1243 | dn = NULL; |
1243 | } | 1244 | } |
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index f091b1351786..fad95f8f2608 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c | |||
@@ -2374,6 +2374,8 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap, | |||
2374 | num_fcntl_locks, | 2374 | num_fcntl_locks, |
2375 | num_flock_locks); | 2375 | num_flock_locks); |
2376 | unlock_kernel(); | 2376 | unlock_kernel(); |
2377 | } else { | ||
2378 | err = ceph_pagelist_append(pagelist, &rec, reclen); | ||
2377 | } | 2379 | } |
2378 | 2380 | ||
2379 | out_free: | 2381 | out_free: |
diff --git a/fs/ceph/pagelist.c b/fs/ceph/pagelist.c index b6859f47d364..46a368b6dce5 100644 --- a/fs/ceph/pagelist.c +++ b/fs/ceph/pagelist.c | |||
@@ -5,10 +5,18 @@ | |||
5 | 5 | ||
6 | #include "pagelist.h" | 6 | #include "pagelist.h" |
7 | 7 | ||
8 | static void ceph_pagelist_unmap_tail(struct ceph_pagelist *pl) | ||
9 | { | ||
10 | struct page *page = list_entry(pl->head.prev, struct page, | ||
11 | lru); | ||
12 | kunmap(page); | ||
13 | } | ||
14 | |||
8 | int ceph_pagelist_release(struct ceph_pagelist *pl) | 15 | int ceph_pagelist_release(struct ceph_pagelist *pl) |
9 | { | 16 | { |
10 | if (pl->mapped_tail) | 17 | if (pl->mapped_tail) |
11 | kunmap(pl->mapped_tail); | 18 | ceph_pagelist_unmap_tail(pl); |
19 | |||
12 | while (!list_empty(&pl->head)) { | 20 | while (!list_empty(&pl->head)) { |
13 | struct page *page = list_first_entry(&pl->head, struct page, | 21 | struct page *page = list_first_entry(&pl->head, struct page, |
14 | lru); | 22 | lru); |
@@ -26,7 +34,7 @@ static int ceph_pagelist_addpage(struct ceph_pagelist *pl) | |||
26 | pl->room += PAGE_SIZE; | 34 | pl->room += PAGE_SIZE; |
27 | list_add_tail(&page->lru, &pl->head); | 35 | list_add_tail(&page->lru, &pl->head); |
28 | if (pl->mapped_tail) | 36 | if (pl->mapped_tail) |
29 | kunmap(pl->mapped_tail); | 37 | ceph_pagelist_unmap_tail(pl); |
30 | pl->mapped_tail = kmap(page); | 38 | pl->mapped_tail = kmap(page); |
31 | return 0; | 39 | return 0; |
32 | } | 40 | } |
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index 4868b9dcac5a..190b6c4a6f2b 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c | |||
@@ -119,6 +119,7 @@ static struct ceph_snap_realm *ceph_create_snap_realm( | |||
119 | INIT_LIST_HEAD(&realm->children); | 119 | INIT_LIST_HEAD(&realm->children); |
120 | INIT_LIST_HEAD(&realm->child_item); | 120 | INIT_LIST_HEAD(&realm->child_item); |
121 | INIT_LIST_HEAD(&realm->empty_item); | 121 | INIT_LIST_HEAD(&realm->empty_item); |
122 | INIT_LIST_HEAD(&realm->dirty_item); | ||
122 | INIT_LIST_HEAD(&realm->inodes_with_caps); | 123 | INIT_LIST_HEAD(&realm->inodes_with_caps); |
123 | spin_lock_init(&realm->inodes_with_caps_lock); | 124 | spin_lock_init(&realm->inodes_with_caps_lock); |
124 | __insert_snap_realm(&mdsc->snap_realms, realm); | 125 | __insert_snap_realm(&mdsc->snap_realms, realm); |
@@ -467,7 +468,7 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci) | |||
467 | INIT_LIST_HEAD(&capsnap->ci_item); | 468 | INIT_LIST_HEAD(&capsnap->ci_item); |
468 | INIT_LIST_HEAD(&capsnap->flushing_item); | 469 | INIT_LIST_HEAD(&capsnap->flushing_item); |
469 | 470 | ||
470 | capsnap->follows = snapc->seq - 1; | 471 | capsnap->follows = snapc->seq; |
471 | capsnap->issued = __ceph_caps_issued(ci, NULL); | 472 | capsnap->issued = __ceph_caps_issued(ci, NULL); |
472 | capsnap->dirty = dirty; | 473 | capsnap->dirty = dirty; |
473 | 474 | ||
@@ -604,6 +605,7 @@ int ceph_update_snap_trace(struct ceph_mds_client *mdsc, | |||
604 | struct ceph_snap_realm *realm; | 605 | struct ceph_snap_realm *realm; |
605 | int invalidate = 0; | 606 | int invalidate = 0; |
606 | int err = -ENOMEM; | 607 | int err = -ENOMEM; |
608 | LIST_HEAD(dirty_realms); | ||
607 | 609 | ||
608 | dout("update_snap_trace deletion=%d\n", deletion); | 610 | dout("update_snap_trace deletion=%d\n", deletion); |
609 | more: | 611 | more: |
@@ -626,24 +628,6 @@ more: | |||
626 | } | 628 | } |
627 | } | 629 | } |
628 | 630 | ||
629 | if (le64_to_cpu(ri->seq) > realm->seq) { | ||
630 | dout("update_snap_trace updating %llx %p %lld -> %lld\n", | ||
631 | realm->ino, realm, realm->seq, le64_to_cpu(ri->seq)); | ||
632 | /* | ||
633 | * if the realm seq has changed, queue a cap_snap for every | ||
634 | * inode with open caps. we do this _before_ we update | ||
635 | * the realm info so that we prepare for writeback under the | ||
636 | * _previous_ snap context. | ||
637 | * | ||
638 | * ...unless it's a snap deletion! | ||
639 | */ | ||
640 | if (!deletion) | ||
641 | queue_realm_cap_snaps(realm); | ||
642 | } else { | ||
643 | dout("update_snap_trace %llx %p seq %lld unchanged\n", | ||
644 | realm->ino, realm, realm->seq); | ||
645 | } | ||
646 | |||
647 | /* ensure the parent is correct */ | 631 | /* ensure the parent is correct */ |
648 | err = adjust_snap_realm_parent(mdsc, realm, le64_to_cpu(ri->parent)); | 632 | err = adjust_snap_realm_parent(mdsc, realm, le64_to_cpu(ri->parent)); |
649 | if (err < 0) | 633 | if (err < 0) |
@@ -651,6 +635,8 @@ more: | |||
651 | invalidate += err; | 635 | invalidate += err; |
652 | 636 | ||
653 | if (le64_to_cpu(ri->seq) > realm->seq) { | 637 | if (le64_to_cpu(ri->seq) > realm->seq) { |
638 | dout("update_snap_trace updating %llx %p %lld -> %lld\n", | ||
639 | realm->ino, realm, realm->seq, le64_to_cpu(ri->seq)); | ||
654 | /* update realm parameters, snap lists */ | 640 | /* update realm parameters, snap lists */ |
655 | realm->seq = le64_to_cpu(ri->seq); | 641 | realm->seq = le64_to_cpu(ri->seq); |
656 | realm->created = le64_to_cpu(ri->created); | 642 | realm->created = le64_to_cpu(ri->created); |
@@ -668,9 +654,17 @@ more: | |||
668 | if (err < 0) | 654 | if (err < 0) |
669 | goto fail; | 655 | goto fail; |
670 | 656 | ||
657 | /* queue realm for cap_snap creation */ | ||
658 | list_add(&realm->dirty_item, &dirty_realms); | ||
659 | |||
671 | invalidate = 1; | 660 | invalidate = 1; |
672 | } else if (!realm->cached_context) { | 661 | } else if (!realm->cached_context) { |
662 | dout("update_snap_trace %llx %p seq %lld new\n", | ||
663 | realm->ino, realm, realm->seq); | ||
673 | invalidate = 1; | 664 | invalidate = 1; |
665 | } else { | ||
666 | dout("update_snap_trace %llx %p seq %lld unchanged\n", | ||
667 | realm->ino, realm, realm->seq); | ||
674 | } | 668 | } |
675 | 669 | ||
676 | dout("done with %llx %p, invalidated=%d, %p %p\n", realm->ino, | 670 | dout("done with %llx %p, invalidated=%d, %p %p\n", realm->ino, |
@@ -683,6 +677,14 @@ more: | |||
683 | if (invalidate) | 677 | if (invalidate) |
684 | rebuild_snap_realms(realm); | 678 | rebuild_snap_realms(realm); |
685 | 679 | ||
680 | /* | ||
681 | * queue cap snaps _after_ we've built the new snap contexts, | ||
682 | * so that i_head_snapc can be set appropriately. | ||
683 | */ | ||
684 | list_for_each_entry(realm, &dirty_realms, dirty_item) { | ||
685 | queue_realm_cap_snaps(realm); | ||
686 | } | ||
687 | |||
686 | __cleanup_empty_realms(mdsc); | 688 | __cleanup_empty_realms(mdsc); |
687 | return 0; | 689 | return 0; |
688 | 690 | ||
@@ -715,7 +717,7 @@ static void flush_snaps(struct ceph_mds_client *mdsc) | |||
715 | igrab(inode); | 717 | igrab(inode); |
716 | spin_unlock(&mdsc->snap_flush_lock); | 718 | spin_unlock(&mdsc->snap_flush_lock); |
717 | spin_lock(&inode->i_lock); | 719 | spin_lock(&inode->i_lock); |
718 | __ceph_flush_snaps(ci, &session); | 720 | __ceph_flush_snaps(ci, &session, 0); |
719 | spin_unlock(&inode->i_lock); | 721 | spin_unlock(&inode->i_lock); |
720 | iput(inode); | 722 | iput(inode); |
721 | spin_lock(&mdsc->snap_flush_lock); | 723 | spin_lock(&mdsc->snap_flush_lock); |
@@ -816,6 +818,7 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc, | |||
816 | }; | 818 | }; |
817 | struct inode *inode = ceph_find_inode(sb, vino); | 819 | struct inode *inode = ceph_find_inode(sb, vino); |
818 | struct ceph_inode_info *ci; | 820 | struct ceph_inode_info *ci; |
821 | struct ceph_snap_realm *oldrealm; | ||
819 | 822 | ||
820 | if (!inode) | 823 | if (!inode) |
821 | continue; | 824 | continue; |
@@ -841,18 +844,19 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc, | |||
841 | dout(" will move %p to split realm %llx %p\n", | 844 | dout(" will move %p to split realm %llx %p\n", |
842 | inode, realm->ino, realm); | 845 | inode, realm->ino, realm); |
843 | /* | 846 | /* |
844 | * Remove the inode from the realm's inode | 847 | * Move the inode to the new realm |
845 | * list, but don't add it to the new realm | ||
846 | * yet. We don't want the cap_snap to be | ||
847 | * queued (again) by ceph_update_snap_trace() | ||
848 | * below. Queue it _now_, under the old context. | ||
849 | */ | 848 | */ |
850 | spin_lock(&realm->inodes_with_caps_lock); | 849 | spin_lock(&realm->inodes_with_caps_lock); |
851 | list_del_init(&ci->i_snap_realm_item); | 850 | list_del_init(&ci->i_snap_realm_item); |
851 | list_add(&ci->i_snap_realm_item, | ||
852 | &realm->inodes_with_caps); | ||
853 | oldrealm = ci->i_snap_realm; | ||
854 | ci->i_snap_realm = realm; | ||
852 | spin_unlock(&realm->inodes_with_caps_lock); | 855 | spin_unlock(&realm->inodes_with_caps_lock); |
853 | spin_unlock(&inode->i_lock); | 856 | spin_unlock(&inode->i_lock); |
854 | 857 | ||
855 | ceph_queue_cap_snap(ci); | 858 | ceph_get_snap_realm(mdsc, realm); |
859 | ceph_put_snap_realm(mdsc, oldrealm); | ||
856 | 860 | ||
857 | iput(inode); | 861 | iput(inode); |
858 | continue; | 862 | continue; |
@@ -880,43 +884,9 @@ skip_inode: | |||
880 | ceph_update_snap_trace(mdsc, p, e, | 884 | ceph_update_snap_trace(mdsc, p, e, |
881 | op == CEPH_SNAP_OP_DESTROY); | 885 | op == CEPH_SNAP_OP_DESTROY); |
882 | 886 | ||
883 | if (op == CEPH_SNAP_OP_SPLIT) { | 887 | if (op == CEPH_SNAP_OP_SPLIT) |
884 | /* | ||
885 | * ok, _now_ add the inodes into the new realm. | ||
886 | */ | ||
887 | for (i = 0; i < num_split_inos; i++) { | ||
888 | struct ceph_vino vino = { | ||
889 | .ino = le64_to_cpu(split_inos[i]), | ||
890 | .snap = CEPH_NOSNAP, | ||
891 | }; | ||
892 | struct inode *inode = ceph_find_inode(sb, vino); | ||
893 | struct ceph_inode_info *ci; | ||
894 | |||
895 | if (!inode) | ||
896 | continue; | ||
897 | ci = ceph_inode(inode); | ||
898 | spin_lock(&inode->i_lock); | ||
899 | if (list_empty(&ci->i_snap_realm_item)) { | ||
900 | struct ceph_snap_realm *oldrealm = | ||
901 | ci->i_snap_realm; | ||
902 | |||
903 | dout(" moving %p to split realm %llx %p\n", | ||
904 | inode, realm->ino, realm); | ||
905 | spin_lock(&realm->inodes_with_caps_lock); | ||
906 | list_add(&ci->i_snap_realm_item, | ||
907 | &realm->inodes_with_caps); | ||
908 | ci->i_snap_realm = realm; | ||
909 | spin_unlock(&realm->inodes_with_caps_lock); | ||
910 | ceph_get_snap_realm(mdsc, realm); | ||
911 | ceph_put_snap_realm(mdsc, oldrealm); | ||
912 | } | ||
913 | spin_unlock(&inode->i_lock); | ||
914 | iput(inode); | ||
915 | } | ||
916 | |||
917 | /* we took a reference when we created the realm, above */ | 888 | /* we took a reference when we created the realm, above */ |
918 | ceph_put_snap_realm(mdsc, realm); | 889 | ceph_put_snap_realm(mdsc, realm); |
919 | } | ||
920 | 890 | ||
921 | __cleanup_empty_realms(mdsc); | 891 | __cleanup_empty_realms(mdsc); |
922 | 892 | ||
diff --git a/fs/ceph/super.h b/fs/ceph/super.h index c33897ae5725..b87638e84c4b 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h | |||
@@ -690,6 +690,8 @@ struct ceph_snap_realm { | |||
690 | 690 | ||
691 | struct list_head empty_item; /* if i have ref==0 */ | 691 | struct list_head empty_item; /* if i have ref==0 */ |
692 | 692 | ||
693 | struct list_head dirty_item; /* if realm needs new context */ | ||
694 | |||
693 | /* the current set of snaps for this realm */ | 695 | /* the current set of snaps for this realm */ |
694 | struct ceph_snap_context *cached_context; | 696 | struct ceph_snap_context *cached_context; |
695 | 697 | ||
@@ -826,7 +828,8 @@ extern void ceph_put_cap_refs(struct ceph_inode_info *ci, int had); | |||
826 | extern void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr, | 828 | extern void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr, |
827 | struct ceph_snap_context *snapc); | 829 | struct ceph_snap_context *snapc); |
828 | extern void __ceph_flush_snaps(struct ceph_inode_info *ci, | 830 | extern void __ceph_flush_snaps(struct ceph_inode_info *ci, |
829 | struct ceph_mds_session **psession); | 831 | struct ceph_mds_session **psession, |
832 | int again); | ||
830 | extern void ceph_check_caps(struct ceph_inode_info *ci, int flags, | 833 | extern void ceph_check_caps(struct ceph_inode_info *ci, int flags, |
831 | struct ceph_mds_session *session); | 834 | struct ceph_mds_session *session); |
832 | extern void ceph_check_delayed_caps(struct ceph_mds_client *mdsc); | 835 | extern void ceph_check_delayed_caps(struct ceph_mds_client *mdsc); |