diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2013-07-09 15:39:10 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-07-09 15:39:10 -0400 |
commit | 9a5889ae1ce41f376e6a5b56e17e0c5a755fda80 (patch) | |
tree | 0eaadb5530d5b82460e0bfb0b4403e080d7b1b8f /fs | |
parent | e3a0dd98e1ddfd135b7ef889fcc0269e8c2ca445 (diff) | |
parent | 8b8cf8917f9b5d74e04f281272d8719ce335a497 (diff) |
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client
Pull Ceph updates from Sage Weil:
"There is some follow-on RBD cleanup after the last window's code drop,
a series from Yan fixing multi-mds behavior in cephfs, and then a
sprinkling of bug fixes all around. Some warnings, sleeping while
atomic, a null dereference, and cleanups"
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client: (36 commits)
libceph: fix invalid unsigned->signed conversion for timespec encoding
libceph: call r_unsafe_callback when unsafe reply is received
ceph: fix race between cap issue and revoke
ceph: fix cap revoke race
ceph: fix pending vmtruncate race
ceph: avoid accessing invalid memory
libceph: Fix NULL pointer dereference in auth client code
ceph: Reconstruct the func ceph_reserve_caps.
ceph: Free mdsc if alloc mdsc->mdsmap failed.
ceph: remove sb_start/end_write in ceph_aio_write.
ceph: avoid meaningless calling ceph_caps_revoking if sync_mode == WB_SYNC_ALL.
ceph: fix sleeping function called from invalid context.
ceph: move inode to proper flushing list when auth MDS changes
rbd: fix a couple warnings
ceph: clear migrate seq when MDS restarts
ceph: check migrate seq before changing auth cap
ceph: fix race between page writeback and truncate
ceph: reset iov_len when discarding cap release messages
ceph: fix cap release race
libceph: fix truncate size calculation
...
Diffstat (limited to 'fs')
-rw-r--r-- | fs/ceph/addr.c | 88 | ||||
-rw-r--r-- | fs/ceph/caps.c | 102 | ||||
-rw-r--r-- | fs/ceph/file.c | 4 | ||||
-rw-r--r-- | fs/ceph/inode.c | 14 | ||||
-rw-r--r-- | fs/ceph/locks.c | 2 | ||||
-rw-r--r-- | fs/ceph/mds_client.c | 6 | ||||
-rw-r--r-- | fs/ceph/mdsmap.c | 42 | ||||
-rw-r--r-- | fs/ceph/super.c | 2 | ||||
-rw-r--r-- | fs/ceph/super.h | 4 | ||||
-rw-r--r-- | fs/ceph/xattr.c | 9 |
10 files changed, 149 insertions, 124 deletions
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 38b5c1bc6776..5318a3b704f6 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c | |||
@@ -439,13 +439,12 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) | |||
439 | struct ceph_inode_info *ci; | 439 | struct ceph_inode_info *ci; |
440 | struct ceph_fs_client *fsc; | 440 | struct ceph_fs_client *fsc; |
441 | struct ceph_osd_client *osdc; | 441 | struct ceph_osd_client *osdc; |
442 | loff_t page_off = page_offset(page); | ||
443 | int len = PAGE_CACHE_SIZE; | ||
444 | loff_t i_size; | ||
445 | int err = 0; | ||
446 | struct ceph_snap_context *snapc, *oldest; | 442 | struct ceph_snap_context *snapc, *oldest; |
447 | u64 snap_size = 0; | 443 | loff_t page_off = page_offset(page); |
448 | long writeback_stat; | 444 | long writeback_stat; |
445 | u64 truncate_size, snap_size = 0; | ||
446 | u32 truncate_seq; | ||
447 | int err = 0, len = PAGE_CACHE_SIZE; | ||
449 | 448 | ||
450 | dout("writepage %p idx %lu\n", page, page->index); | 449 | dout("writepage %p idx %lu\n", page, page->index); |
451 | 450 | ||
@@ -475,13 +474,20 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) | |||
475 | } | 474 | } |
476 | ceph_put_snap_context(oldest); | 475 | ceph_put_snap_context(oldest); |
477 | 476 | ||
477 | spin_lock(&ci->i_ceph_lock); | ||
478 | truncate_seq = ci->i_truncate_seq; | ||
479 | truncate_size = ci->i_truncate_size; | ||
480 | if (!snap_size) | ||
481 | snap_size = i_size_read(inode); | ||
482 | spin_unlock(&ci->i_ceph_lock); | ||
483 | |||
478 | /* is this a partial page at end of file? */ | 484 | /* is this a partial page at end of file? */ |
479 | if (snap_size) | 485 | if (page_off >= snap_size) { |
480 | i_size = snap_size; | 486 | dout("%p page eof %llu\n", page, snap_size); |
481 | else | 487 | goto out; |
482 | i_size = i_size_read(inode); | 488 | } |
483 | if (i_size < page_off + len) | 489 | if (snap_size < page_off + len) |
484 | len = i_size - page_off; | 490 | len = snap_size - page_off; |
485 | 491 | ||
486 | dout("writepage %p page %p index %lu on %llu~%u snapc %p\n", | 492 | dout("writepage %p page %p index %lu on %llu~%u snapc %p\n", |
487 | inode, page, page->index, page_off, len, snapc); | 493 | inode, page, page->index, page_off, len, snapc); |
@@ -495,7 +501,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) | |||
495 | err = ceph_osdc_writepages(osdc, ceph_vino(inode), | 501 | err = ceph_osdc_writepages(osdc, ceph_vino(inode), |
496 | &ci->i_layout, snapc, | 502 | &ci->i_layout, snapc, |
497 | page_off, len, | 503 | page_off, len, |
498 | ci->i_truncate_seq, ci->i_truncate_size, | 504 | truncate_seq, truncate_size, |
499 | &inode->i_mtime, &page, 1); | 505 | &inode->i_mtime, &page, 1); |
500 | if (err < 0) { | 506 | if (err < 0) { |
501 | dout("writepage setting page/mapping error %d %p\n", err, page); | 507 | dout("writepage setting page/mapping error %d %p\n", err, page); |
@@ -632,25 +638,6 @@ static void writepages_finish(struct ceph_osd_request *req, | |||
632 | ceph_osdc_put_request(req); | 638 | ceph_osdc_put_request(req); |
633 | } | 639 | } |
634 | 640 | ||
635 | static struct ceph_osd_request * | ||
636 | ceph_writepages_osd_request(struct inode *inode, u64 offset, u64 *len, | ||
637 | struct ceph_snap_context *snapc, int num_ops) | ||
638 | { | ||
639 | struct ceph_fs_client *fsc; | ||
640 | struct ceph_inode_info *ci; | ||
641 | struct ceph_vino vino; | ||
642 | |||
643 | fsc = ceph_inode_to_client(inode); | ||
644 | ci = ceph_inode(inode); | ||
645 | vino = ceph_vino(inode); | ||
646 | /* BUG_ON(vino.snap != CEPH_NOSNAP); */ | ||
647 | |||
648 | return ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, | ||
649 | vino, offset, len, num_ops, CEPH_OSD_OP_WRITE, | ||
650 | CEPH_OSD_FLAG_WRITE|CEPH_OSD_FLAG_ONDISK, | ||
651 | snapc, ci->i_truncate_seq, ci->i_truncate_size, true); | ||
652 | } | ||
653 | |||
654 | /* | 641 | /* |
655 | * initiate async writeback | 642 | * initiate async writeback |
656 | */ | 643 | */ |
@@ -659,7 +646,8 @@ static int ceph_writepages_start(struct address_space *mapping, | |||
659 | { | 646 | { |
660 | struct inode *inode = mapping->host; | 647 | struct inode *inode = mapping->host; |
661 | struct ceph_inode_info *ci = ceph_inode(inode); | 648 | struct ceph_inode_info *ci = ceph_inode(inode); |
662 | struct ceph_fs_client *fsc; | 649 | struct ceph_fs_client *fsc = ceph_inode_to_client(inode); |
650 | struct ceph_vino vino = ceph_vino(inode); | ||
663 | pgoff_t index, start, end; | 651 | pgoff_t index, start, end; |
664 | int range_whole = 0; | 652 | int range_whole = 0; |
665 | int should_loop = 1; | 653 | int should_loop = 1; |
@@ -671,22 +659,22 @@ static int ceph_writepages_start(struct address_space *mapping, | |||
671 | unsigned wsize = 1 << inode->i_blkbits; | 659 | unsigned wsize = 1 << inode->i_blkbits; |
672 | struct ceph_osd_request *req = NULL; | 660 | struct ceph_osd_request *req = NULL; |
673 | int do_sync; | 661 | int do_sync; |
674 | u64 snap_size; | 662 | u64 truncate_size, snap_size; |
663 | u32 truncate_seq; | ||
675 | 664 | ||
676 | /* | 665 | /* |
677 | * Include a 'sync' in the OSD request if this is a data | 666 | * Include a 'sync' in the OSD request if this is a data |
678 | * integrity write (e.g., O_SYNC write or fsync()), or if our | 667 | * integrity write (e.g., O_SYNC write or fsync()), or if our |
679 | * cap is being revoked. | 668 | * cap is being revoked. |
680 | */ | 669 | */ |
681 | do_sync = wbc->sync_mode == WB_SYNC_ALL; | 670 | if ((wbc->sync_mode == WB_SYNC_ALL) || |
682 | if (ceph_caps_revoking(ci, CEPH_CAP_FILE_BUFFER)) | 671 | ceph_caps_revoking(ci, CEPH_CAP_FILE_BUFFER)) |
683 | do_sync = 1; | 672 | do_sync = 1; |
684 | dout("writepages_start %p dosync=%d (mode=%s)\n", | 673 | dout("writepages_start %p dosync=%d (mode=%s)\n", |
685 | inode, do_sync, | 674 | inode, do_sync, |
686 | wbc->sync_mode == WB_SYNC_NONE ? "NONE" : | 675 | wbc->sync_mode == WB_SYNC_NONE ? "NONE" : |
687 | (wbc->sync_mode == WB_SYNC_ALL ? "ALL" : "HOLD")); | 676 | (wbc->sync_mode == WB_SYNC_ALL ? "ALL" : "HOLD")); |
688 | 677 | ||
689 | fsc = ceph_inode_to_client(inode); | ||
690 | if (fsc->mount_state == CEPH_MOUNT_SHUTDOWN) { | 678 | if (fsc->mount_state == CEPH_MOUNT_SHUTDOWN) { |
691 | pr_warning("writepage_start %p on forced umount\n", inode); | 679 | pr_warning("writepage_start %p on forced umount\n", inode); |
692 | return -EIO; /* we're in a forced umount, don't write! */ | 680 | return -EIO; /* we're in a forced umount, don't write! */ |
@@ -729,6 +717,14 @@ retry: | |||
729 | snap_size = i_size_read(inode); | 717 | snap_size = i_size_read(inode); |
730 | dout(" oldest snapc is %p seq %lld (%d snaps)\n", | 718 | dout(" oldest snapc is %p seq %lld (%d snaps)\n", |
731 | snapc, snapc->seq, snapc->num_snaps); | 719 | snapc, snapc->seq, snapc->num_snaps); |
720 | |||
721 | spin_lock(&ci->i_ceph_lock); | ||
722 | truncate_seq = ci->i_truncate_seq; | ||
723 | truncate_size = ci->i_truncate_size; | ||
724 | if (!snap_size) | ||
725 | snap_size = i_size_read(inode); | ||
726 | spin_unlock(&ci->i_ceph_lock); | ||
727 | |||
732 | if (last_snapc && snapc != last_snapc) { | 728 | if (last_snapc && snapc != last_snapc) { |
733 | /* if we switched to a newer snapc, restart our scan at the | 729 | /* if we switched to a newer snapc, restart our scan at the |
734 | * start of the original file range. */ | 730 | * start of the original file range. */ |
@@ -740,7 +736,6 @@ retry: | |||
740 | 736 | ||
741 | while (!done && index <= end) { | 737 | while (!done && index <= end) { |
742 | int num_ops = do_sync ? 2 : 1; | 738 | int num_ops = do_sync ? 2 : 1; |
743 | struct ceph_vino vino; | ||
744 | unsigned i; | 739 | unsigned i; |
745 | int first; | 740 | int first; |
746 | pgoff_t next; | 741 | pgoff_t next; |
@@ -834,17 +829,18 @@ get_more_pages: | |||
834 | * that it will use. | 829 | * that it will use. |
835 | */ | 830 | */ |
836 | if (locked_pages == 0) { | 831 | if (locked_pages == 0) { |
837 | size_t size; | ||
838 | |||
839 | BUG_ON(pages); | 832 | BUG_ON(pages); |
840 | |||
841 | /* prepare async write request */ | 833 | /* prepare async write request */ |
842 | offset = (u64)page_offset(page); | 834 | offset = (u64)page_offset(page); |
843 | len = wsize; | 835 | len = wsize; |
844 | req = ceph_writepages_osd_request(inode, | 836 | req = ceph_osdc_new_request(&fsc->client->osdc, |
845 | offset, &len, snapc, | 837 | &ci->i_layout, vino, |
846 | num_ops); | 838 | offset, &len, num_ops, |
847 | 839 | CEPH_OSD_OP_WRITE, | |
840 | CEPH_OSD_FLAG_WRITE | | ||
841 | CEPH_OSD_FLAG_ONDISK, | ||
842 | snapc, truncate_seq, | ||
843 | truncate_size, true); | ||
848 | if (IS_ERR(req)) { | 844 | if (IS_ERR(req)) { |
849 | rc = PTR_ERR(req); | 845 | rc = PTR_ERR(req); |
850 | unlock_page(page); | 846 | unlock_page(page); |
@@ -855,8 +851,8 @@ get_more_pages: | |||
855 | req->r_inode = inode; | 851 | req->r_inode = inode; |
856 | 852 | ||
857 | max_pages = calc_pages_for(0, (u64)len); | 853 | max_pages = calc_pages_for(0, (u64)len); |
858 | size = max_pages * sizeof (*pages); | 854 | pages = kmalloc(max_pages * sizeof (*pages), |
859 | pages = kmalloc(size, GFP_NOFS); | 855 | GFP_NOFS); |
860 | if (!pages) { | 856 | if (!pages) { |
861 | pool = fsc->wb_pagevec_pool; | 857 | pool = fsc->wb_pagevec_pool; |
862 | pages = mempool_alloc(pool, GFP_NOFS); | 858 | pages = mempool_alloc(pool, GFP_NOFS); |
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index da0f9b8a3bcb..25442b40c25a 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c | |||
@@ -147,7 +147,7 @@ void ceph_adjust_min_caps(struct ceph_mds_client *mdsc, int delta) | |||
147 | spin_unlock(&mdsc->caps_list_lock); | 147 | spin_unlock(&mdsc->caps_list_lock); |
148 | } | 148 | } |
149 | 149 | ||
150 | int ceph_reserve_caps(struct ceph_mds_client *mdsc, | 150 | void ceph_reserve_caps(struct ceph_mds_client *mdsc, |
151 | struct ceph_cap_reservation *ctx, int need) | 151 | struct ceph_cap_reservation *ctx, int need) |
152 | { | 152 | { |
153 | int i; | 153 | int i; |
@@ -155,7 +155,6 @@ int ceph_reserve_caps(struct ceph_mds_client *mdsc, | |||
155 | int have; | 155 | int have; |
156 | int alloc = 0; | 156 | int alloc = 0; |
157 | LIST_HEAD(newcaps); | 157 | LIST_HEAD(newcaps); |
158 | int ret = 0; | ||
159 | 158 | ||
160 | dout("reserve caps ctx=%p need=%d\n", ctx, need); | 159 | dout("reserve caps ctx=%p need=%d\n", ctx, need); |
161 | 160 | ||
@@ -174,14 +173,15 @@ int ceph_reserve_caps(struct ceph_mds_client *mdsc, | |||
174 | 173 | ||
175 | for (i = have; i < need; i++) { | 174 | for (i = have; i < need; i++) { |
176 | cap = kmem_cache_alloc(ceph_cap_cachep, GFP_NOFS); | 175 | cap = kmem_cache_alloc(ceph_cap_cachep, GFP_NOFS); |
177 | if (!cap) { | 176 | if (!cap) |
178 | ret = -ENOMEM; | 177 | break; |
179 | goto out_alloc_count; | ||
180 | } | ||
181 | list_add(&cap->caps_item, &newcaps); | 178 | list_add(&cap->caps_item, &newcaps); |
182 | alloc++; | 179 | alloc++; |
183 | } | 180 | } |
184 | BUG_ON(have + alloc != need); | 181 | /* we didn't manage to reserve as much as we needed */ |
182 | if (have + alloc != need) | ||
183 | pr_warn("reserve caps ctx=%p ENOMEM need=%d got=%d\n", | ||
184 | ctx, need, have + alloc); | ||
185 | 185 | ||
186 | spin_lock(&mdsc->caps_list_lock); | 186 | spin_lock(&mdsc->caps_list_lock); |
187 | mdsc->caps_total_count += alloc; | 187 | mdsc->caps_total_count += alloc; |
@@ -197,13 +197,6 @@ int ceph_reserve_caps(struct ceph_mds_client *mdsc, | |||
197 | dout("reserve caps ctx=%p %d = %d used + %d resv + %d avail\n", | 197 | dout("reserve caps ctx=%p %d = %d used + %d resv + %d avail\n", |
198 | ctx, mdsc->caps_total_count, mdsc->caps_use_count, | 198 | ctx, mdsc->caps_total_count, mdsc->caps_use_count, |
199 | mdsc->caps_reserve_count, mdsc->caps_avail_count); | 199 | mdsc->caps_reserve_count, mdsc->caps_avail_count); |
200 | return 0; | ||
201 | |||
202 | out_alloc_count: | ||
203 | /* we didn't manage to reserve as much as we needed */ | ||
204 | pr_warning("reserve caps ctx=%p ENOMEM need=%d got=%d\n", | ||
205 | ctx, need, have); | ||
206 | return ret; | ||
207 | } | 200 | } |
208 | 201 | ||
209 | int ceph_unreserve_caps(struct ceph_mds_client *mdsc, | 202 | int ceph_unreserve_caps(struct ceph_mds_client *mdsc, |
@@ -612,9 +605,11 @@ retry: | |||
612 | __cap_delay_requeue(mdsc, ci); | 605 | __cap_delay_requeue(mdsc, ci); |
613 | } | 606 | } |
614 | 607 | ||
615 | if (flags & CEPH_CAP_FLAG_AUTH) | 608 | if (flags & CEPH_CAP_FLAG_AUTH) { |
616 | ci->i_auth_cap = cap; | 609 | if (ci->i_auth_cap == NULL || |
617 | else if (ci->i_auth_cap == cap) { | 610 | ceph_seq_cmp(ci->i_auth_cap->mseq, mseq) < 0) |
611 | ci->i_auth_cap = cap; | ||
612 | } else if (ci->i_auth_cap == cap) { | ||
618 | ci->i_auth_cap = NULL; | 613 | ci->i_auth_cap = NULL; |
619 | spin_lock(&mdsc->cap_dirty_lock); | 614 | spin_lock(&mdsc->cap_dirty_lock); |
620 | if (!list_empty(&ci->i_dirty_item)) { | 615 | if (!list_empty(&ci->i_dirty_item)) { |
@@ -695,6 +690,15 @@ int __ceph_caps_issued(struct ceph_inode_info *ci, int *implemented) | |||
695 | if (implemented) | 690 | if (implemented) |
696 | *implemented |= cap->implemented; | 691 | *implemented |= cap->implemented; |
697 | } | 692 | } |
693 | /* | ||
694 | * exclude caps issued by non-auth MDS, but are been revoking | ||
695 | * by the auth MDS. The non-auth MDS should be revoking/exporting | ||
696 | * these caps, but the message is delayed. | ||
697 | */ | ||
698 | if (ci->i_auth_cap) { | ||
699 | cap = ci->i_auth_cap; | ||
700 | have &= ~cap->implemented | cap->issued; | ||
701 | } | ||
698 | return have; | 702 | return have; |
699 | } | 703 | } |
700 | 704 | ||
@@ -802,22 +806,28 @@ int __ceph_caps_issued_mask(struct ceph_inode_info *ci, int mask, int touch) | |||
802 | /* | 806 | /* |
803 | * Return true if mask caps are currently being revoked by an MDS. | 807 | * Return true if mask caps are currently being revoked by an MDS. |
804 | */ | 808 | */ |
805 | int ceph_caps_revoking(struct ceph_inode_info *ci, int mask) | 809 | int __ceph_caps_revoking_other(struct ceph_inode_info *ci, |
810 | struct ceph_cap *ocap, int mask) | ||
806 | { | 811 | { |
807 | struct inode *inode = &ci->vfs_inode; | ||
808 | struct ceph_cap *cap; | 812 | struct ceph_cap *cap; |
809 | struct rb_node *p; | 813 | struct rb_node *p; |
810 | int ret = 0; | ||
811 | 814 | ||
812 | spin_lock(&ci->i_ceph_lock); | ||
813 | for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) { | 815 | for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) { |
814 | cap = rb_entry(p, struct ceph_cap, ci_node); | 816 | cap = rb_entry(p, struct ceph_cap, ci_node); |
815 | if (__cap_is_valid(cap) && | 817 | if (cap != ocap && __cap_is_valid(cap) && |
816 | (cap->implemented & ~cap->issued & mask)) { | 818 | (cap->implemented & ~cap->issued & mask)) |
817 | ret = 1; | 819 | return 1; |
818 | break; | ||
819 | } | ||
820 | } | 820 | } |
821 | return 0; | ||
822 | } | ||
823 | |||
824 | int ceph_caps_revoking(struct ceph_inode_info *ci, int mask) | ||
825 | { | ||
826 | struct inode *inode = &ci->vfs_inode; | ||
827 | int ret; | ||
828 | |||
829 | spin_lock(&ci->i_ceph_lock); | ||
830 | ret = __ceph_caps_revoking_other(ci, NULL, mask); | ||
821 | spin_unlock(&ci->i_ceph_lock); | 831 | spin_unlock(&ci->i_ceph_lock); |
822 | dout("ceph_caps_revoking %p %s = %d\n", inode, | 832 | dout("ceph_caps_revoking %p %s = %d\n", inode, |
823 | ceph_cap_string(mask), ret); | 833 | ceph_cap_string(mask), ret); |
@@ -1980,8 +1990,15 @@ static void kick_flushing_inode_caps(struct ceph_mds_client *mdsc, | |||
1980 | cap = ci->i_auth_cap; | 1990 | cap = ci->i_auth_cap; |
1981 | dout("kick_flushing_inode_caps %p flushing %s flush_seq %lld\n", inode, | 1991 | dout("kick_flushing_inode_caps %p flushing %s flush_seq %lld\n", inode, |
1982 | ceph_cap_string(ci->i_flushing_caps), ci->i_cap_flush_seq); | 1992 | ceph_cap_string(ci->i_flushing_caps), ci->i_cap_flush_seq); |
1993 | |||
1983 | __ceph_flush_snaps(ci, &session, 1); | 1994 | __ceph_flush_snaps(ci, &session, 1); |
1995 | |||
1984 | if (ci->i_flushing_caps) { | 1996 | if (ci->i_flushing_caps) { |
1997 | spin_lock(&mdsc->cap_dirty_lock); | ||
1998 | list_move_tail(&ci->i_flushing_item, | ||
1999 | &cap->session->s_cap_flushing); | ||
2000 | spin_unlock(&mdsc->cap_dirty_lock); | ||
2001 | |||
1985 | delayed = __send_cap(mdsc, cap, CEPH_CAP_OP_FLUSH, | 2002 | delayed = __send_cap(mdsc, cap, CEPH_CAP_OP_FLUSH, |
1986 | __ceph_caps_used(ci), | 2003 | __ceph_caps_used(ci), |
1987 | __ceph_caps_wanted(ci), | 2004 | __ceph_caps_wanted(ci), |
@@ -2055,7 +2072,11 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want, | |||
2055 | /* finish pending truncate */ | 2072 | /* finish pending truncate */ |
2056 | while (ci->i_truncate_pending) { | 2073 | while (ci->i_truncate_pending) { |
2057 | spin_unlock(&ci->i_ceph_lock); | 2074 | spin_unlock(&ci->i_ceph_lock); |
2058 | __ceph_do_pending_vmtruncate(inode, !(need & CEPH_CAP_FILE_WR)); | 2075 | if (!(need & CEPH_CAP_FILE_WR)) |
2076 | mutex_lock(&inode->i_mutex); | ||
2077 | __ceph_do_pending_vmtruncate(inode); | ||
2078 | if (!(need & CEPH_CAP_FILE_WR)) | ||
2079 | mutex_unlock(&inode->i_mutex); | ||
2059 | spin_lock(&ci->i_ceph_lock); | 2080 | spin_lock(&ci->i_ceph_lock); |
2060 | } | 2081 | } |
2061 | 2082 | ||
@@ -2473,6 +2494,11 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, | |||
2473 | } else { | 2494 | } else { |
2474 | dout("grant: %s -> %s\n", ceph_cap_string(cap->issued), | 2495 | dout("grant: %s -> %s\n", ceph_cap_string(cap->issued), |
2475 | ceph_cap_string(newcaps)); | 2496 | ceph_cap_string(newcaps)); |
2497 | /* non-auth MDS is revoking the newly grant caps ? */ | ||
2498 | if (cap == ci->i_auth_cap && | ||
2499 | __ceph_caps_revoking_other(ci, cap, newcaps)) | ||
2500 | check_caps = 2; | ||
2501 | |||
2476 | cap->issued = newcaps; | 2502 | cap->issued = newcaps; |
2477 | cap->implemented |= newcaps; /* add bits only, to | 2503 | cap->implemented |= newcaps; /* add bits only, to |
2478 | * avoid stepping on a | 2504 | * avoid stepping on a |
@@ -3042,21 +3068,19 @@ int ceph_encode_inode_release(void **p, struct inode *inode, | |||
3042 | (cap->issued & unless) == 0)) { | 3068 | (cap->issued & unless) == 0)) { |
3043 | if ((cap->issued & drop) && | 3069 | if ((cap->issued & drop) && |
3044 | (cap->issued & unless) == 0) { | 3070 | (cap->issued & unless) == 0) { |
3045 | dout("encode_inode_release %p cap %p %s -> " | 3071 | int wanted = __ceph_caps_wanted(ci); |
3046 | "%s\n", inode, cap, | 3072 | if ((ci->i_ceph_flags & CEPH_I_NODELAY) == 0) |
3073 | wanted |= cap->mds_wanted; | ||
3074 | dout("encode_inode_release %p cap %p " | ||
3075 | "%s -> %s, wanted %s -> %s\n", inode, cap, | ||
3047 | ceph_cap_string(cap->issued), | 3076 | ceph_cap_string(cap->issued), |
3048 | ceph_cap_string(cap->issued & ~drop)); | 3077 | ceph_cap_string(cap->issued & ~drop), |
3078 | ceph_cap_string(cap->mds_wanted), | ||
3079 | ceph_cap_string(wanted)); | ||
3080 | |||
3049 | cap->issued &= ~drop; | 3081 | cap->issued &= ~drop; |
3050 | cap->implemented &= ~drop; | 3082 | cap->implemented &= ~drop; |
3051 | if (ci->i_ceph_flags & CEPH_I_NODELAY) { | 3083 | cap->mds_wanted = wanted; |
3052 | int wanted = __ceph_caps_wanted(ci); | ||
3053 | dout(" wanted %s -> %s (act %s)\n", | ||
3054 | ceph_cap_string(cap->mds_wanted), | ||
3055 | ceph_cap_string(cap->mds_wanted & | ||
3056 | ~wanted), | ||
3057 | ceph_cap_string(wanted)); | ||
3058 | cap->mds_wanted &= wanted; | ||
3059 | } | ||
3060 | } else { | 3084 | } else { |
3061 | dout("encode_inode_release %p cap %p %s" | 3085 | dout("encode_inode_release %p cap %p %s" |
3062 | " (force)\n", inode, cap, | 3086 | " (force)\n", inode, cap, |
diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 16c989d3e23c..2ddf061c1c4a 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c | |||
@@ -716,7 +716,6 @@ static ssize_t ceph_aio_write(struct kiocb *iocb, const struct iovec *iov, | |||
716 | if (ceph_snap(inode) != CEPH_NOSNAP) | 716 | if (ceph_snap(inode) != CEPH_NOSNAP) |
717 | return -EROFS; | 717 | return -EROFS; |
718 | 718 | ||
719 | sb_start_write(inode->i_sb); | ||
720 | mutex_lock(&inode->i_mutex); | 719 | mutex_lock(&inode->i_mutex); |
721 | hold_mutex = true; | 720 | hold_mutex = true; |
722 | 721 | ||
@@ -809,7 +808,6 @@ retry_snap: | |||
809 | out: | 808 | out: |
810 | if (hold_mutex) | 809 | if (hold_mutex) |
811 | mutex_unlock(&inode->i_mutex); | 810 | mutex_unlock(&inode->i_mutex); |
812 | sb_end_write(inode->i_sb); | ||
813 | current->backing_dev_info = NULL; | 811 | current->backing_dev_info = NULL; |
814 | 812 | ||
815 | return written ? written : err; | 813 | return written ? written : err; |
@@ -824,7 +822,7 @@ static loff_t ceph_llseek(struct file *file, loff_t offset, int whence) | |||
824 | int ret; | 822 | int ret; |
825 | 823 | ||
826 | mutex_lock(&inode->i_mutex); | 824 | mutex_lock(&inode->i_mutex); |
827 | __ceph_do_pending_vmtruncate(inode, false); | 825 | __ceph_do_pending_vmtruncate(inode); |
828 | 826 | ||
829 | if (whence == SEEK_END || whence == SEEK_DATA || whence == SEEK_HOLE) { | 827 | if (whence == SEEK_END || whence == SEEK_DATA || whence == SEEK_HOLE) { |
830 | ret = ceph_do_getattr(inode, CEPH_STAT_CAP_SIZE); | 828 | ret = ceph_do_getattr(inode, CEPH_STAT_CAP_SIZE); |
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index bd2289a4f40d..f3a2abf28a77 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c | |||
@@ -1465,7 +1465,9 @@ static void ceph_vmtruncate_work(struct work_struct *work) | |||
1465 | struct inode *inode = &ci->vfs_inode; | 1465 | struct inode *inode = &ci->vfs_inode; |
1466 | 1466 | ||
1467 | dout("vmtruncate_work %p\n", inode); | 1467 | dout("vmtruncate_work %p\n", inode); |
1468 | __ceph_do_pending_vmtruncate(inode, true); | 1468 | mutex_lock(&inode->i_mutex); |
1469 | __ceph_do_pending_vmtruncate(inode); | ||
1470 | mutex_unlock(&inode->i_mutex); | ||
1469 | iput(inode); | 1471 | iput(inode); |
1470 | } | 1472 | } |
1471 | 1473 | ||
@@ -1492,7 +1494,7 @@ void ceph_queue_vmtruncate(struct inode *inode) | |||
1492 | * Make sure any pending truncation is applied before doing anything | 1494 | * Make sure any pending truncation is applied before doing anything |
1493 | * that may depend on it. | 1495 | * that may depend on it. |
1494 | */ | 1496 | */ |
1495 | void __ceph_do_pending_vmtruncate(struct inode *inode, bool needlock) | 1497 | void __ceph_do_pending_vmtruncate(struct inode *inode) |
1496 | { | 1498 | { |
1497 | struct ceph_inode_info *ci = ceph_inode(inode); | 1499 | struct ceph_inode_info *ci = ceph_inode(inode); |
1498 | u64 to; | 1500 | u64 to; |
@@ -1525,11 +1527,7 @@ retry: | |||
1525 | ci->i_truncate_pending, to); | 1527 | ci->i_truncate_pending, to); |
1526 | spin_unlock(&ci->i_ceph_lock); | 1528 | spin_unlock(&ci->i_ceph_lock); |
1527 | 1529 | ||
1528 | if (needlock) | ||
1529 | mutex_lock(&inode->i_mutex); | ||
1530 | truncate_inode_pages(inode->i_mapping, to); | 1530 | truncate_inode_pages(inode->i_mapping, to); |
1531 | if (needlock) | ||
1532 | mutex_unlock(&inode->i_mutex); | ||
1533 | 1531 | ||
1534 | spin_lock(&ci->i_ceph_lock); | 1532 | spin_lock(&ci->i_ceph_lock); |
1535 | if (to == ci->i_truncate_size) { | 1533 | if (to == ci->i_truncate_size) { |
@@ -1588,7 +1586,7 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr) | |||
1588 | if (ceph_snap(inode) != CEPH_NOSNAP) | 1586 | if (ceph_snap(inode) != CEPH_NOSNAP) |
1589 | return -EROFS; | 1587 | return -EROFS; |
1590 | 1588 | ||
1591 | __ceph_do_pending_vmtruncate(inode, false); | 1589 | __ceph_do_pending_vmtruncate(inode); |
1592 | 1590 | ||
1593 | err = inode_change_ok(inode, attr); | 1591 | err = inode_change_ok(inode, attr); |
1594 | if (err != 0) | 1592 | if (err != 0) |
@@ -1770,7 +1768,7 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr) | |||
1770 | ceph_cap_string(dirtied), mask); | 1768 | ceph_cap_string(dirtied), mask); |
1771 | 1769 | ||
1772 | ceph_mdsc_put_request(req); | 1770 | ceph_mdsc_put_request(req); |
1773 | __ceph_do_pending_vmtruncate(inode, false); | 1771 | __ceph_do_pending_vmtruncate(inode); |
1774 | return err; | 1772 | return err; |
1775 | out: | 1773 | out: |
1776 | spin_unlock(&ci->i_ceph_lock); | 1774 | spin_unlock(&ci->i_ceph_lock); |
diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c index 690f73f42425..ae6d14e82b0f 100644 --- a/fs/ceph/locks.c +++ b/fs/ceph/locks.c | |||
@@ -169,7 +169,7 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl) | |||
169 | } | 169 | } |
170 | 170 | ||
171 | /** | 171 | /** |
172 | * Must be called with BKL already held. Fills in the passed | 172 | * Must be called with lock_flocks() already held. Fills in the passed |
173 | * counter variables, so you can prepare pagelist metadata before calling | 173 | * counter variables, so you can prepare pagelist metadata before calling |
174 | * ceph_encode_locks. | 174 | * ceph_encode_locks. |
175 | */ | 175 | */ |
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 99890b02a10b..187bf214444d 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c | |||
@@ -1391,6 +1391,7 @@ static void discard_cap_releases(struct ceph_mds_client *mdsc, | |||
1391 | num = le32_to_cpu(head->num); | 1391 | num = le32_to_cpu(head->num); |
1392 | dout("discard_cap_releases mds%d %p %u\n", session->s_mds, msg, num); | 1392 | dout("discard_cap_releases mds%d %p %u\n", session->s_mds, msg, num); |
1393 | head->num = cpu_to_le32(0); | 1393 | head->num = cpu_to_le32(0); |
1394 | msg->front.iov_len = sizeof(*head); | ||
1394 | session->s_num_cap_releases += num; | 1395 | session->s_num_cap_releases += num; |
1395 | 1396 | ||
1396 | /* requeue completed messages */ | 1397 | /* requeue completed messages */ |
@@ -2454,6 +2455,7 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap, | |||
2454 | spin_lock(&ci->i_ceph_lock); | 2455 | spin_lock(&ci->i_ceph_lock); |
2455 | cap->seq = 0; /* reset cap seq */ | 2456 | cap->seq = 0; /* reset cap seq */ |
2456 | cap->issue_seq = 0; /* and issue_seq */ | 2457 | cap->issue_seq = 0; /* and issue_seq */ |
2458 | cap->mseq = 0; /* and migrate_seq */ | ||
2457 | 2459 | ||
2458 | if (recon_state->flock) { | 2460 | if (recon_state->flock) { |
2459 | rec.v2.cap_id = cpu_to_le64(cap->cap_id); | 2461 | rec.v2.cap_id = cpu_to_le64(cap->cap_id); |
@@ -3040,8 +3042,10 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc) | |||
3040 | fsc->mdsc = mdsc; | 3042 | fsc->mdsc = mdsc; |
3041 | mutex_init(&mdsc->mutex); | 3043 | mutex_init(&mdsc->mutex); |
3042 | mdsc->mdsmap = kzalloc(sizeof(*mdsc->mdsmap), GFP_NOFS); | 3044 | mdsc->mdsmap = kzalloc(sizeof(*mdsc->mdsmap), GFP_NOFS); |
3043 | if (mdsc->mdsmap == NULL) | 3045 | if (mdsc->mdsmap == NULL) { |
3046 | kfree(mdsc); | ||
3044 | return -ENOMEM; | 3047 | return -ENOMEM; |
3048 | } | ||
3045 | 3049 | ||
3046 | init_completion(&mdsc->safe_umount_waiters); | 3050 | init_completion(&mdsc->safe_umount_waiters); |
3047 | init_waitqueue_head(&mdsc->session_close_wq); | 3051 | init_waitqueue_head(&mdsc->session_close_wq); |
diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c index 9278dec9e940..132b64eeecd4 100644 --- a/fs/ceph/mdsmap.c +++ b/fs/ceph/mdsmap.c | |||
@@ -92,6 +92,7 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) | |||
92 | u32 num_export_targets; | 92 | u32 num_export_targets; |
93 | void *pexport_targets = NULL; | 93 | void *pexport_targets = NULL; |
94 | struct ceph_timespec laggy_since; | 94 | struct ceph_timespec laggy_since; |
95 | struct ceph_mds_info *info; | ||
95 | 96 | ||
96 | ceph_decode_need(p, end, sizeof(u64)*2 + 1 + sizeof(u32), bad); | 97 | ceph_decode_need(p, end, sizeof(u64)*2 + 1 + sizeof(u32), bad); |
97 | global_id = ceph_decode_64(p); | 98 | global_id = ceph_decode_64(p); |
@@ -126,24 +127,27 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) | |||
126 | i+1, n, global_id, mds, inc, | 127 | i+1, n, global_id, mds, inc, |
127 | ceph_pr_addr(&addr.in_addr), | 128 | ceph_pr_addr(&addr.in_addr), |
128 | ceph_mds_state_name(state)); | 129 | ceph_mds_state_name(state)); |
129 | if (mds >= 0 && mds < m->m_max_mds && state > 0) { | 130 | |
130 | m->m_info[mds].global_id = global_id; | 131 | if (mds < 0 || mds >= m->m_max_mds || state <= 0) |
131 | m->m_info[mds].state = state; | 132 | continue; |
132 | m->m_info[mds].addr = addr; | 133 | |
133 | m->m_info[mds].laggy = | 134 | info = &m->m_info[mds]; |
134 | (laggy_since.tv_sec != 0 || | 135 | info->global_id = global_id; |
135 | laggy_since.tv_nsec != 0); | 136 | info->state = state; |
136 | m->m_info[mds].num_export_targets = num_export_targets; | 137 | info->addr = addr; |
137 | if (num_export_targets) { | 138 | info->laggy = (laggy_since.tv_sec != 0 || |
138 | m->m_info[mds].export_targets = | 139 | laggy_since.tv_nsec != 0); |
139 | kcalloc(num_export_targets, sizeof(u32), | 140 | info->num_export_targets = num_export_targets; |
140 | GFP_NOFS); | 141 | if (num_export_targets) { |
141 | for (j = 0; j < num_export_targets; j++) | 142 | info->export_targets = kcalloc(num_export_targets, |
142 | m->m_info[mds].export_targets[j] = | 143 | sizeof(u32), GFP_NOFS); |
143 | ceph_decode_32(&pexport_targets); | 144 | if (info->export_targets == NULL) |
144 | } else { | 145 | goto badmem; |
145 | m->m_info[mds].export_targets = NULL; | 146 | for (j = 0; j < num_export_targets; j++) |
146 | } | 147 | info->export_targets[j] = |
148 | ceph_decode_32(&pexport_targets); | ||
149 | } else { | ||
150 | info->export_targets = NULL; | ||
147 | } | 151 | } |
148 | } | 152 | } |
149 | 153 | ||
@@ -170,7 +174,7 @@ bad: | |||
170 | DUMP_PREFIX_OFFSET, 16, 1, | 174 | DUMP_PREFIX_OFFSET, 16, 1, |
171 | start, end - start, true); | 175 | start, end - start, true); |
172 | ceph_mdsmap_destroy(m); | 176 | ceph_mdsmap_destroy(m); |
173 | return ERR_PTR(-EINVAL); | 177 | return ERR_PTR(err); |
174 | } | 178 | } |
175 | 179 | ||
176 | void ceph_mdsmap_destroy(struct ceph_mdsmap *m) | 180 | void ceph_mdsmap_destroy(struct ceph_mdsmap *m) |
diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 7d377c9a5e35..6627b26a800c 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c | |||
@@ -357,7 +357,7 @@ static int parse_mount_options(struct ceph_mount_options **pfsopt, | |||
357 | } | 357 | } |
358 | err = -EINVAL; | 358 | err = -EINVAL; |
359 | dev_name_end--; /* back up to ':' separator */ | 359 | dev_name_end--; /* back up to ':' separator */ |
360 | if (*dev_name_end != ':') { | 360 | if (dev_name_end < dev_name || *dev_name_end != ':') { |
361 | pr_err("device name is missing path (no : separator in %s)\n", | 361 | pr_err("device name is missing path (no : separator in %s)\n", |
362 | dev_name); | 362 | dev_name); |
363 | goto out; | 363 | goto out; |
diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 7ccfdb4aea2e..cbded572345e 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h | |||
@@ -534,7 +534,7 @@ extern int __ceph_caps_mds_wanted(struct ceph_inode_info *ci); | |||
534 | extern void ceph_caps_init(struct ceph_mds_client *mdsc); | 534 | extern void ceph_caps_init(struct ceph_mds_client *mdsc); |
535 | extern void ceph_caps_finalize(struct ceph_mds_client *mdsc); | 535 | extern void ceph_caps_finalize(struct ceph_mds_client *mdsc); |
536 | extern void ceph_adjust_min_caps(struct ceph_mds_client *mdsc, int delta); | 536 | extern void ceph_adjust_min_caps(struct ceph_mds_client *mdsc, int delta); |
537 | extern int ceph_reserve_caps(struct ceph_mds_client *mdsc, | 537 | extern void ceph_reserve_caps(struct ceph_mds_client *mdsc, |
538 | struct ceph_cap_reservation *ctx, int need); | 538 | struct ceph_cap_reservation *ctx, int need); |
539 | extern int ceph_unreserve_caps(struct ceph_mds_client *mdsc, | 539 | extern int ceph_unreserve_caps(struct ceph_mds_client *mdsc, |
540 | struct ceph_cap_reservation *ctx); | 540 | struct ceph_cap_reservation *ctx); |
@@ -692,7 +692,7 @@ extern int ceph_readdir_prepopulate(struct ceph_mds_request *req, | |||
692 | extern int ceph_inode_holds_cap(struct inode *inode, int mask); | 692 | extern int ceph_inode_holds_cap(struct inode *inode, int mask); |
693 | 693 | ||
694 | extern int ceph_inode_set_size(struct inode *inode, loff_t size); | 694 | extern int ceph_inode_set_size(struct inode *inode, loff_t size); |
695 | extern void __ceph_do_pending_vmtruncate(struct inode *inode, bool needlock); | 695 | extern void __ceph_do_pending_vmtruncate(struct inode *inode); |
696 | extern void ceph_queue_vmtruncate(struct inode *inode); | 696 | extern void ceph_queue_vmtruncate(struct inode *inode); |
697 | 697 | ||
698 | extern void ceph_queue_invalidate(struct inode *inode); | 698 | extern void ceph_queue_invalidate(struct inode *inode); |
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index 9b6b2b6dd164..be661d8f532a 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c | |||
@@ -675,17 +675,18 @@ ssize_t ceph_getxattr(struct dentry *dentry, const char *name, void *value, | |||
675 | if (!ceph_is_valid_xattr(name)) | 675 | if (!ceph_is_valid_xattr(name)) |
676 | return -ENODATA; | 676 | return -ENODATA; |
677 | 677 | ||
678 | spin_lock(&ci->i_ceph_lock); | ||
679 | dout("getxattr %p ver=%lld index_ver=%lld\n", inode, | ||
680 | ci->i_xattrs.version, ci->i_xattrs.index_version); | ||
681 | 678 | ||
682 | /* let's see if a virtual xattr was requested */ | 679 | /* let's see if a virtual xattr was requested */ |
683 | vxattr = ceph_match_vxattr(inode, name); | 680 | vxattr = ceph_match_vxattr(inode, name); |
684 | if (vxattr && !(vxattr->exists_cb && !vxattr->exists_cb(ci))) { | 681 | if (vxattr && !(vxattr->exists_cb && !vxattr->exists_cb(ci))) { |
685 | err = vxattr->getxattr_cb(ci, value, size); | 682 | err = vxattr->getxattr_cb(ci, value, size); |
686 | goto out; | 683 | return err; |
687 | } | 684 | } |
688 | 685 | ||
686 | spin_lock(&ci->i_ceph_lock); | ||
687 | dout("getxattr %p ver=%lld index_ver=%lld\n", inode, | ||
688 | ci->i_xattrs.version, ci->i_xattrs.index_version); | ||
689 | |||
689 | if (__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1) && | 690 | if (__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1) && |
690 | (ci->i_xattrs.index_version >= ci->i_xattrs.version)) { | 691 | (ci->i_xattrs.index_version >= ci->i_xattrs.version)) { |
691 | goto get_xattr; | 692 | goto get_xattr; |