diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2010-05-12 21:47:29 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2010-05-12 21:47:29 -0400 |
commit | cdf5f61ed1d64d50eb9cf10071ab40836f5f9f91 (patch) | |
tree | d67166525b89055b44ecf52cd9ae395a676877f5 /fs | |
parent | 769d9968e42c995eaaf61ac5583d998f32e0769a (diff) | |
parent | e84346b726ea90a8ed470bc81c4136a7b8710ea5 (diff) |
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client:
ceph: preserve seq # on requeued messages after transient transport errors
ceph: fix cap removal races
ceph: zero unused message header, footer fields
ceph: fix locking for waking session requests after reconnect
ceph: resubmit requests on pg mapping change (not just primary change)
ceph: fix open file counting on snapped inodes when mds returns no caps
ceph: unregister osd request on failure
ceph: don't use writeback_control in writepages completion
ceph: unregister bdi before kill_anon_super releases device name
Diffstat (limited to 'fs')
-rw-r--r-- | fs/ceph/addr.c | 6 | ||||
-rw-r--r-- | fs/ceph/caps.c | 19 | ||||
-rw-r--r-- | fs/ceph/inode.c | 4 | ||||
-rw-r--r-- | fs/ceph/mds_client.c | 34 | ||||
-rw-r--r-- | fs/ceph/messenger.c | 17 | ||||
-rw-r--r-- | fs/ceph/messenger.h | 1 | ||||
-rw-r--r-- | fs/ceph/osd_client.c | 26 | ||||
-rw-r--r-- | fs/ceph/osd_client.h | 3 | ||||
-rw-r--r-- | fs/ceph/osdmap.c | 29 | ||||
-rw-r--r-- | fs/ceph/osdmap.h | 2 | ||||
-rw-r--r-- | fs/ceph/rados.h | 1 | ||||
-rw-r--r-- | fs/ceph/super.c | 23 |
12 files changed, 116 insertions, 49 deletions
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 4b42c2bb603f..a9005d862ed4 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c | |||
@@ -504,7 +504,6 @@ static void writepages_finish(struct ceph_osd_request *req, | |||
504 | int i; | 504 | int i; |
505 | struct ceph_snap_context *snapc = req->r_snapc; | 505 | struct ceph_snap_context *snapc = req->r_snapc; |
506 | struct address_space *mapping = inode->i_mapping; | 506 | struct address_space *mapping = inode->i_mapping; |
507 | struct writeback_control *wbc = req->r_wbc; | ||
508 | __s32 rc = -EIO; | 507 | __s32 rc = -EIO; |
509 | u64 bytes = 0; | 508 | u64 bytes = 0; |
510 | struct ceph_client *client = ceph_inode_to_client(inode); | 509 | struct ceph_client *client = ceph_inode_to_client(inode); |
@@ -546,10 +545,6 @@ static void writepages_finish(struct ceph_osd_request *req, | |||
546 | clear_bdi_congested(&client->backing_dev_info, | 545 | clear_bdi_congested(&client->backing_dev_info, |
547 | BLK_RW_ASYNC); | 546 | BLK_RW_ASYNC); |
548 | 547 | ||
549 | if (i >= wrote) { | ||
550 | dout("inode %p skipping page %p\n", inode, page); | ||
551 | wbc->pages_skipped++; | ||
552 | } | ||
553 | ceph_put_snap_context((void *)page->private); | 548 | ceph_put_snap_context((void *)page->private); |
554 | page->private = 0; | 549 | page->private = 0; |
555 | ClearPagePrivate(page); | 550 | ClearPagePrivate(page); |
@@ -799,7 +794,6 @@ get_more_pages: | |||
799 | alloc_page_vec(client, req); | 794 | alloc_page_vec(client, req); |
800 | req->r_callback = writepages_finish; | 795 | req->r_callback = writepages_finish; |
801 | req->r_inode = inode; | 796 | req->r_inode = inode; |
802 | req->r_wbc = wbc; | ||
803 | } | 797 | } |
804 | 798 | ||
805 | /* note position of first page in pvec */ | 799 | /* note position of first page in pvec */ |
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 0c1681806867..d9400534b279 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c | |||
@@ -858,6 +858,8 @@ static int __ceph_is_any_caps(struct ceph_inode_info *ci) | |||
858 | } | 858 | } |
859 | 859 | ||
860 | /* | 860 | /* |
861 | * Remove a cap. Take steps to deal with a racing iterate_session_caps. | ||
862 | * | ||
861 | * caller should hold i_lock. | 863 | * caller should hold i_lock. |
862 | * caller will not hold session s_mutex if called from destroy_inode. | 864 | * caller will not hold session s_mutex if called from destroy_inode. |
863 | */ | 865 | */ |
@@ -866,15 +868,10 @@ void __ceph_remove_cap(struct ceph_cap *cap) | |||
866 | struct ceph_mds_session *session = cap->session; | 868 | struct ceph_mds_session *session = cap->session; |
867 | struct ceph_inode_info *ci = cap->ci; | 869 | struct ceph_inode_info *ci = cap->ci; |
868 | struct ceph_mds_client *mdsc = &ceph_client(ci->vfs_inode.i_sb)->mdsc; | 870 | struct ceph_mds_client *mdsc = &ceph_client(ci->vfs_inode.i_sb)->mdsc; |
871 | int removed = 0; | ||
869 | 872 | ||
870 | dout("__ceph_remove_cap %p from %p\n", cap, &ci->vfs_inode); | 873 | dout("__ceph_remove_cap %p from %p\n", cap, &ci->vfs_inode); |
871 | 874 | ||
872 | /* remove from inode list */ | ||
873 | rb_erase(&cap->ci_node, &ci->i_caps); | ||
874 | cap->ci = NULL; | ||
875 | if (ci->i_auth_cap == cap) | ||
876 | ci->i_auth_cap = NULL; | ||
877 | |||
878 | /* remove from session list */ | 875 | /* remove from session list */ |
879 | spin_lock(&session->s_cap_lock); | 876 | spin_lock(&session->s_cap_lock); |
880 | if (session->s_cap_iterator == cap) { | 877 | if (session->s_cap_iterator == cap) { |
@@ -885,10 +882,18 @@ void __ceph_remove_cap(struct ceph_cap *cap) | |||
885 | list_del_init(&cap->session_caps); | 882 | list_del_init(&cap->session_caps); |
886 | session->s_nr_caps--; | 883 | session->s_nr_caps--; |
887 | cap->session = NULL; | 884 | cap->session = NULL; |
885 | removed = 1; | ||
888 | } | 886 | } |
887 | /* protect backpointer with s_cap_lock: see iterate_session_caps */ | ||
888 | cap->ci = NULL; | ||
889 | spin_unlock(&session->s_cap_lock); | 889 | spin_unlock(&session->s_cap_lock); |
890 | 890 | ||
891 | if (cap->session == NULL) | 891 | /* remove from inode list */ |
892 | rb_erase(&cap->ci_node, &ci->i_caps); | ||
893 | if (ci->i_auth_cap == cap) | ||
894 | ci->i_auth_cap = NULL; | ||
895 | |||
896 | if (removed) | ||
892 | ceph_put_cap(cap); | 897 | ceph_put_cap(cap); |
893 | 898 | ||
894 | if (!__ceph_is_any_caps(ci) && ci->i_snap_realm) { | 899 | if (!__ceph_is_any_caps(ci) && ci->i_snap_realm) { |
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 261f3e6c0bcf..85b4d2ffdeba 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c | |||
@@ -733,6 +733,10 @@ no_change: | |||
733 | __ceph_get_fmode(ci, cap_fmode); | 733 | __ceph_get_fmode(ci, cap_fmode); |
734 | spin_unlock(&inode->i_lock); | 734 | spin_unlock(&inode->i_lock); |
735 | } | 735 | } |
736 | } else if (cap_fmode >= 0) { | ||
737 | pr_warning("mds issued no caps on %llx.%llx\n", | ||
738 | ceph_vinop(inode)); | ||
739 | __ceph_get_fmode(ci, cap_fmode); | ||
736 | } | 740 | } |
737 | 741 | ||
738 | /* update delegation info? */ | 742 | /* update delegation info? */ |
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 60a9a4ae47be..24561a557e01 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c | |||
@@ -736,9 +736,10 @@ static void cleanup_cap_releases(struct ceph_mds_session *session) | |||
736 | } | 736 | } |
737 | 737 | ||
738 | /* | 738 | /* |
739 | * Helper to safely iterate over all caps associated with a session. | 739 | * Helper to safely iterate over all caps associated with a session, with |
740 | * special care taken to handle a racing __ceph_remove_cap(). | ||
740 | * | 741 | * |
741 | * caller must hold session s_mutex | 742 | * Caller must hold session s_mutex. |
742 | */ | 743 | */ |
743 | static int iterate_session_caps(struct ceph_mds_session *session, | 744 | static int iterate_session_caps(struct ceph_mds_session *session, |
744 | int (*cb)(struct inode *, struct ceph_cap *, | 745 | int (*cb)(struct inode *, struct ceph_cap *, |
@@ -2136,7 +2137,7 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, int mds) | |||
2136 | struct ceph_mds_session *session = NULL; | 2137 | struct ceph_mds_session *session = NULL; |
2137 | struct ceph_msg *reply; | 2138 | struct ceph_msg *reply; |
2138 | struct rb_node *p; | 2139 | struct rb_node *p; |
2139 | int err; | 2140 | int err = -ENOMEM; |
2140 | struct ceph_pagelist *pagelist; | 2141 | struct ceph_pagelist *pagelist; |
2141 | 2142 | ||
2142 | pr_info("reconnect to recovering mds%d\n", mds); | 2143 | pr_info("reconnect to recovering mds%d\n", mds); |
@@ -2185,7 +2186,7 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, int mds) | |||
2185 | goto fail; | 2186 | goto fail; |
2186 | err = iterate_session_caps(session, encode_caps_cb, pagelist); | 2187 | err = iterate_session_caps(session, encode_caps_cb, pagelist); |
2187 | if (err < 0) | 2188 | if (err < 0) |
2188 | goto out; | 2189 | goto fail; |
2189 | 2190 | ||
2190 | /* | 2191 | /* |
2191 | * snaprealms. we provide mds with the ino, seq (version), and | 2192 | * snaprealms. we provide mds with the ino, seq (version), and |
@@ -2213,28 +2214,31 @@ send: | |||
2213 | reply->nr_pages = calc_pages_for(0, pagelist->length); | 2214 | reply->nr_pages = calc_pages_for(0, pagelist->length); |
2214 | ceph_con_send(&session->s_con, reply); | 2215 | ceph_con_send(&session->s_con, reply); |
2215 | 2216 | ||
2216 | if (session) { | 2217 | session->s_state = CEPH_MDS_SESSION_OPEN; |
2217 | session->s_state = CEPH_MDS_SESSION_OPEN; | 2218 | mutex_unlock(&session->s_mutex); |
2218 | __wake_requests(mdsc, &session->s_waiting); | 2219 | |
2219 | } | 2220 | mutex_lock(&mdsc->mutex); |
2221 | __wake_requests(mdsc, &session->s_waiting); | ||
2222 | mutex_unlock(&mdsc->mutex); | ||
2223 | |||
2224 | ceph_put_mds_session(session); | ||
2220 | 2225 | ||
2221 | out: | ||
2222 | up_read(&mdsc->snap_rwsem); | 2226 | up_read(&mdsc->snap_rwsem); |
2223 | if (session) { | ||
2224 | mutex_unlock(&session->s_mutex); | ||
2225 | ceph_put_mds_session(session); | ||
2226 | } | ||
2227 | mutex_lock(&mdsc->mutex); | 2227 | mutex_lock(&mdsc->mutex); |
2228 | return; | 2228 | return; |
2229 | 2229 | ||
2230 | fail: | 2230 | fail: |
2231 | ceph_msg_put(reply); | 2231 | ceph_msg_put(reply); |
2232 | up_read(&mdsc->snap_rwsem); | ||
2233 | mutex_unlock(&session->s_mutex); | ||
2234 | ceph_put_mds_session(session); | ||
2232 | fail_nomsg: | 2235 | fail_nomsg: |
2233 | ceph_pagelist_release(pagelist); | 2236 | ceph_pagelist_release(pagelist); |
2234 | kfree(pagelist); | 2237 | kfree(pagelist); |
2235 | fail_nopagelist: | 2238 | fail_nopagelist: |
2236 | pr_err("ENOMEM preparing reconnect for mds%d\n", mds); | 2239 | pr_err("error %d preparing reconnect for mds%d\n", err, mds); |
2237 | goto out; | 2240 | mutex_lock(&mdsc->mutex); |
2241 | return; | ||
2238 | } | 2242 | } |
2239 | 2243 | ||
2240 | 2244 | ||
diff --git a/fs/ceph/messenger.c b/fs/ceph/messenger.c index 509f57d9ccb3..cd4fadb6491a 100644 --- a/fs/ceph/messenger.c +++ b/fs/ceph/messenger.c | |||
@@ -492,7 +492,14 @@ static void prepare_write_message(struct ceph_connection *con) | |||
492 | list_move_tail(&m->list_head, &con->out_sent); | 492 | list_move_tail(&m->list_head, &con->out_sent); |
493 | } | 493 | } |
494 | 494 | ||
495 | m->hdr.seq = cpu_to_le64(++con->out_seq); | 495 | /* |
496 | * only assign outgoing seq # if we haven't sent this message | ||
497 | * yet. if it is requeued, resend with it's original seq. | ||
498 | */ | ||
499 | if (m->needs_out_seq) { | ||
500 | m->hdr.seq = cpu_to_le64(++con->out_seq); | ||
501 | m->needs_out_seq = false; | ||
502 | } | ||
496 | 503 | ||
497 | dout("prepare_write_message %p seq %lld type %d len %d+%d+%d %d pgs\n", | 504 | dout("prepare_write_message %p seq %lld type %d len %d+%d+%d %d pgs\n", |
498 | m, con->out_seq, le16_to_cpu(m->hdr.type), | 505 | m, con->out_seq, le16_to_cpu(m->hdr.type), |
@@ -1986,6 +1993,8 @@ void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg) | |||
1986 | 1993 | ||
1987 | BUG_ON(msg->front.iov_len != le32_to_cpu(msg->hdr.front_len)); | 1994 | BUG_ON(msg->front.iov_len != le32_to_cpu(msg->hdr.front_len)); |
1988 | 1995 | ||
1996 | msg->needs_out_seq = true; | ||
1997 | |||
1989 | /* queue */ | 1998 | /* queue */ |
1990 | mutex_lock(&con->mutex); | 1999 | mutex_lock(&con->mutex); |
1991 | BUG_ON(!list_empty(&msg->list_head)); | 2000 | BUG_ON(!list_empty(&msg->list_head)); |
@@ -2085,15 +2094,19 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, | |||
2085 | kref_init(&m->kref); | 2094 | kref_init(&m->kref); |
2086 | INIT_LIST_HEAD(&m->list_head); | 2095 | INIT_LIST_HEAD(&m->list_head); |
2087 | 2096 | ||
2097 | m->hdr.tid = 0; | ||
2088 | m->hdr.type = cpu_to_le16(type); | 2098 | m->hdr.type = cpu_to_le16(type); |
2099 | m->hdr.priority = cpu_to_le16(CEPH_MSG_PRIO_DEFAULT); | ||
2100 | m->hdr.version = 0; | ||
2089 | m->hdr.front_len = cpu_to_le32(front_len); | 2101 | m->hdr.front_len = cpu_to_le32(front_len); |
2090 | m->hdr.middle_len = 0; | 2102 | m->hdr.middle_len = 0; |
2091 | m->hdr.data_len = cpu_to_le32(page_len); | 2103 | m->hdr.data_len = cpu_to_le32(page_len); |
2092 | m->hdr.data_off = cpu_to_le16(page_off); | 2104 | m->hdr.data_off = cpu_to_le16(page_off); |
2093 | m->hdr.priority = cpu_to_le16(CEPH_MSG_PRIO_DEFAULT); | 2105 | m->hdr.reserved = 0; |
2094 | m->footer.front_crc = 0; | 2106 | m->footer.front_crc = 0; |
2095 | m->footer.middle_crc = 0; | 2107 | m->footer.middle_crc = 0; |
2096 | m->footer.data_crc = 0; | 2108 | m->footer.data_crc = 0; |
2109 | m->footer.flags = 0; | ||
2097 | m->front_max = front_len; | 2110 | m->front_max = front_len; |
2098 | m->front_is_vmalloc = false; | 2111 | m->front_is_vmalloc = false; |
2099 | m->more_to_follow = false; | 2112 | m->more_to_follow = false; |
diff --git a/fs/ceph/messenger.h b/fs/ceph/messenger.h index a343dae73cdc..a5caf91cc971 100644 --- a/fs/ceph/messenger.h +++ b/fs/ceph/messenger.h | |||
@@ -86,6 +86,7 @@ struct ceph_msg { | |||
86 | struct kref kref; | 86 | struct kref kref; |
87 | bool front_is_vmalloc; | 87 | bool front_is_vmalloc; |
88 | bool more_to_follow; | 88 | bool more_to_follow; |
89 | bool needs_out_seq; | ||
89 | int front_max; | 90 | int front_max; |
90 | 91 | ||
91 | struct ceph_msgpool *pool; | 92 | struct ceph_msgpool *pool; |
diff --git a/fs/ceph/osd_client.c b/fs/ceph/osd_client.c index c7b4dedaace6..3514f71ff85f 100644 --- a/fs/ceph/osd_client.c +++ b/fs/ceph/osd_client.c | |||
@@ -565,7 +565,8 @@ static int __map_osds(struct ceph_osd_client *osdc, | |||
565 | { | 565 | { |
566 | struct ceph_osd_request_head *reqhead = req->r_request->front.iov_base; | 566 | struct ceph_osd_request_head *reqhead = req->r_request->front.iov_base; |
567 | struct ceph_pg pgid; | 567 | struct ceph_pg pgid; |
568 | int o = -1; | 568 | int acting[CEPH_PG_MAX_SIZE]; |
569 | int o = -1, num = 0; | ||
569 | int err; | 570 | int err; |
570 | 571 | ||
571 | dout("map_osds %p tid %lld\n", req, req->r_tid); | 572 | dout("map_osds %p tid %lld\n", req, req->r_tid); |
@@ -576,10 +577,16 @@ static int __map_osds(struct ceph_osd_client *osdc, | |||
576 | pgid = reqhead->layout.ol_pgid; | 577 | pgid = reqhead->layout.ol_pgid; |
577 | req->r_pgid = pgid; | 578 | req->r_pgid = pgid; |
578 | 579 | ||
579 | o = ceph_calc_pg_primary(osdc->osdmap, pgid); | 580 | err = ceph_calc_pg_acting(osdc->osdmap, pgid, acting); |
581 | if (err > 0) { | ||
582 | o = acting[0]; | ||
583 | num = err; | ||
584 | } | ||
580 | 585 | ||
581 | if ((req->r_osd && req->r_osd->o_osd == o && | 586 | if ((req->r_osd && req->r_osd->o_osd == o && |
582 | req->r_sent >= req->r_osd->o_incarnation) || | 587 | req->r_sent >= req->r_osd->o_incarnation && |
588 | req->r_num_pg_osds == num && | ||
589 | memcmp(req->r_pg_osds, acting, sizeof(acting[0])*num) == 0) || | ||
583 | (req->r_osd == NULL && o == -1)) | 590 | (req->r_osd == NULL && o == -1)) |
584 | return 0; /* no change */ | 591 | return 0; /* no change */ |
585 | 592 | ||
@@ -587,6 +594,10 @@ static int __map_osds(struct ceph_osd_client *osdc, | |||
587 | req->r_tid, le32_to_cpu(pgid.pool), le16_to_cpu(pgid.ps), o, | 594 | req->r_tid, le32_to_cpu(pgid.pool), le16_to_cpu(pgid.ps), o, |
588 | req->r_osd ? req->r_osd->o_osd : -1); | 595 | req->r_osd ? req->r_osd->o_osd : -1); |
589 | 596 | ||
597 | /* record full pg acting set */ | ||
598 | memcpy(req->r_pg_osds, acting, sizeof(acting[0]) * num); | ||
599 | req->r_num_pg_osds = num; | ||
600 | |||
590 | if (req->r_osd) { | 601 | if (req->r_osd) { |
591 | __cancel_request(req); | 602 | __cancel_request(req); |
592 | list_del_init(&req->r_osd_item); | 603 | list_del_init(&req->r_osd_item); |
@@ -612,7 +623,7 @@ static int __map_osds(struct ceph_osd_client *osdc, | |||
612 | __remove_osd_from_lru(req->r_osd); | 623 | __remove_osd_from_lru(req->r_osd); |
613 | list_add(&req->r_osd_item, &req->r_osd->o_requests); | 624 | list_add(&req->r_osd_item, &req->r_osd->o_requests); |
614 | } | 625 | } |
615 | err = 1; /* osd changed */ | 626 | err = 1; /* osd or pg changed */ |
616 | 627 | ||
617 | out: | 628 | out: |
618 | return err; | 629 | return err; |
@@ -779,16 +790,18 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg, | |||
779 | struct ceph_osd_request *req; | 790 | struct ceph_osd_request *req; |
780 | u64 tid; | 791 | u64 tid; |
781 | int numops, object_len, flags; | 792 | int numops, object_len, flags; |
793 | s32 result; | ||
782 | 794 | ||
783 | tid = le64_to_cpu(msg->hdr.tid); | 795 | tid = le64_to_cpu(msg->hdr.tid); |
784 | if (msg->front.iov_len < sizeof(*rhead)) | 796 | if (msg->front.iov_len < sizeof(*rhead)) |
785 | goto bad; | 797 | goto bad; |
786 | numops = le32_to_cpu(rhead->num_ops); | 798 | numops = le32_to_cpu(rhead->num_ops); |
787 | object_len = le32_to_cpu(rhead->object_len); | 799 | object_len = le32_to_cpu(rhead->object_len); |
800 | result = le32_to_cpu(rhead->result); | ||
788 | if (msg->front.iov_len != sizeof(*rhead) + object_len + | 801 | if (msg->front.iov_len != sizeof(*rhead) + object_len + |
789 | numops * sizeof(struct ceph_osd_op)) | 802 | numops * sizeof(struct ceph_osd_op)) |
790 | goto bad; | 803 | goto bad; |
791 | dout("handle_reply %p tid %llu\n", msg, tid); | 804 | dout("handle_reply %p tid %llu result %d\n", msg, tid, (int)result); |
792 | 805 | ||
793 | /* lookup */ | 806 | /* lookup */ |
794 | mutex_lock(&osdc->request_mutex); | 807 | mutex_lock(&osdc->request_mutex); |
@@ -834,7 +847,8 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg, | |||
834 | dout("handle_reply tid %llu flags %d\n", tid, flags); | 847 | dout("handle_reply tid %llu flags %d\n", tid, flags); |
835 | 848 | ||
836 | /* either this is a read, or we got the safe response */ | 849 | /* either this is a read, or we got the safe response */ |
837 | if ((flags & CEPH_OSD_FLAG_ONDISK) || | 850 | if (result < 0 || |
851 | (flags & CEPH_OSD_FLAG_ONDISK) || | ||
838 | ((flags & CEPH_OSD_FLAG_WRITE) == 0)) | 852 | ((flags & CEPH_OSD_FLAG_WRITE) == 0)) |
839 | __unregister_request(osdc, req); | 853 | __unregister_request(osdc, req); |
840 | 854 | ||
diff --git a/fs/ceph/osd_client.h b/fs/ceph/osd_client.h index b0759911e7c3..ce776989ef6a 100644 --- a/fs/ceph/osd_client.h +++ b/fs/ceph/osd_client.h | |||
@@ -48,6 +48,8 @@ struct ceph_osd_request { | |||
48 | struct list_head r_osd_item; | 48 | struct list_head r_osd_item; |
49 | struct ceph_osd *r_osd; | 49 | struct ceph_osd *r_osd; |
50 | struct ceph_pg r_pgid; | 50 | struct ceph_pg r_pgid; |
51 | int r_pg_osds[CEPH_PG_MAX_SIZE]; | ||
52 | int r_num_pg_osds; | ||
51 | 53 | ||
52 | struct ceph_connection *r_con_filling_msg; | 54 | struct ceph_connection *r_con_filling_msg; |
53 | 55 | ||
@@ -66,7 +68,6 @@ struct ceph_osd_request { | |||
66 | struct list_head r_unsafe_item; | 68 | struct list_head r_unsafe_item; |
67 | 69 | ||
68 | struct inode *r_inode; /* for use by callbacks */ | 70 | struct inode *r_inode; /* for use by callbacks */ |
69 | struct writeback_control *r_wbc; /* ditto */ | ||
70 | 71 | ||
71 | char r_oid[40]; /* object name */ | 72 | char r_oid[40]; /* object name */ |
72 | int r_oid_len; | 73 | int r_oid_len; |
diff --git a/fs/ceph/osdmap.c b/fs/ceph/osdmap.c index 2e2c15eed82a..cfdd8f4388b7 100644 --- a/fs/ceph/osdmap.c +++ b/fs/ceph/osdmap.c | |||
@@ -1041,12 +1041,33 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid, | |||
1041 | } | 1041 | } |
1042 | 1042 | ||
1043 | /* | 1043 | /* |
1044 | * Return acting set for given pgid. | ||
1045 | */ | ||
1046 | int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg pgid, | ||
1047 | int *acting) | ||
1048 | { | ||
1049 | int rawosds[CEPH_PG_MAX_SIZE], *osds; | ||
1050 | int i, o, num = CEPH_PG_MAX_SIZE; | ||
1051 | |||
1052 | osds = calc_pg_raw(osdmap, pgid, rawosds, &num); | ||
1053 | if (!osds) | ||
1054 | return -1; | ||
1055 | |||
1056 | /* primary is first up osd */ | ||
1057 | o = 0; | ||
1058 | for (i = 0; i < num; i++) | ||
1059 | if (ceph_osd_is_up(osdmap, osds[i])) | ||
1060 | acting[o++] = osds[i]; | ||
1061 | return o; | ||
1062 | } | ||
1063 | |||
1064 | /* | ||
1044 | * Return primary osd for given pgid, or -1 if none. | 1065 | * Return primary osd for given pgid, or -1 if none. |
1045 | */ | 1066 | */ |
1046 | int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, struct ceph_pg pgid) | 1067 | int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, struct ceph_pg pgid) |
1047 | { | 1068 | { |
1048 | int rawosds[10], *osds; | 1069 | int rawosds[CEPH_PG_MAX_SIZE], *osds; |
1049 | int i, num = ARRAY_SIZE(rawosds); | 1070 | int i, num = CEPH_PG_MAX_SIZE; |
1050 | 1071 | ||
1051 | osds = calc_pg_raw(osdmap, pgid, rawosds, &num); | 1072 | osds = calc_pg_raw(osdmap, pgid, rawosds, &num); |
1052 | if (!osds) | 1073 | if (!osds) |
@@ -1054,9 +1075,7 @@ int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, struct ceph_pg pgid) | |||
1054 | 1075 | ||
1055 | /* primary is first up osd */ | 1076 | /* primary is first up osd */ |
1056 | for (i = 0; i < num; i++) | 1077 | for (i = 0; i < num; i++) |
1057 | if (ceph_osd_is_up(osdmap, osds[i])) { | 1078 | if (ceph_osd_is_up(osdmap, osds[i])) |
1058 | return osds[i]; | 1079 | return osds[i]; |
1059 | break; | ||
1060 | } | ||
1061 | return -1; | 1080 | return -1; |
1062 | } | 1081 | } |
diff --git a/fs/ceph/osdmap.h b/fs/ceph/osdmap.h index 8bc9f1e4f562..970b547e510d 100644 --- a/fs/ceph/osdmap.h +++ b/fs/ceph/osdmap.h | |||
@@ -120,6 +120,8 @@ extern int ceph_calc_object_layout(struct ceph_object_layout *ol, | |||
120 | const char *oid, | 120 | const char *oid, |
121 | struct ceph_file_layout *fl, | 121 | struct ceph_file_layout *fl, |
122 | struct ceph_osdmap *osdmap); | 122 | struct ceph_osdmap *osdmap); |
123 | extern int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg pgid, | ||
124 | int *acting); | ||
123 | extern int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, | 125 | extern int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, |
124 | struct ceph_pg pgid); | 126 | struct ceph_pg pgid); |
125 | 127 | ||
diff --git a/fs/ceph/rados.h b/fs/ceph/rados.h index a1fc1d017b58..fd56451a871f 100644 --- a/fs/ceph/rados.h +++ b/fs/ceph/rados.h | |||
@@ -58,6 +58,7 @@ struct ceph_timespec { | |||
58 | #define CEPH_PG_LAYOUT_LINEAR 2 | 58 | #define CEPH_PG_LAYOUT_LINEAR 2 |
59 | #define CEPH_PG_LAYOUT_HYBRID 3 | 59 | #define CEPH_PG_LAYOUT_HYBRID 3 |
60 | 60 | ||
61 | #define CEPH_PG_MAX_SIZE 16 /* max # osds in a single pg */ | ||
61 | 62 | ||
62 | /* | 63 | /* |
63 | * placement group. | 64 | * placement group. |
diff --git a/fs/ceph/super.c b/fs/ceph/super.c index f888cf487b7c..110857ba9269 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c | |||
@@ -47,10 +47,20 @@ const char *ceph_file_part(const char *s, int len) | |||
47 | */ | 47 | */ |
48 | static void ceph_put_super(struct super_block *s) | 48 | static void ceph_put_super(struct super_block *s) |
49 | { | 49 | { |
50 | struct ceph_client *cl = ceph_client(s); | 50 | struct ceph_client *client = ceph_sb_to_client(s); |
51 | 51 | ||
52 | dout("put_super\n"); | 52 | dout("put_super\n"); |
53 | ceph_mdsc_close_sessions(&cl->mdsc); | 53 | ceph_mdsc_close_sessions(&client->mdsc); |
54 | |||
55 | /* | ||
56 | * ensure we release the bdi before put_anon_super releases | ||
57 | * the device name. | ||
58 | */ | ||
59 | if (s->s_bdi == &client->backing_dev_info) { | ||
60 | bdi_unregister(&client->backing_dev_info); | ||
61 | s->s_bdi = NULL; | ||
62 | } | ||
63 | |||
54 | return; | 64 | return; |
55 | } | 65 | } |
56 | 66 | ||
@@ -636,6 +646,8 @@ static void ceph_destroy_client(struct ceph_client *client) | |||
636 | destroy_workqueue(client->pg_inv_wq); | 646 | destroy_workqueue(client->pg_inv_wq); |
637 | destroy_workqueue(client->trunc_wq); | 647 | destroy_workqueue(client->trunc_wq); |
638 | 648 | ||
649 | bdi_destroy(&client->backing_dev_info); | ||
650 | |||
639 | if (client->msgr) | 651 | if (client->msgr) |
640 | ceph_messenger_destroy(client->msgr); | 652 | ceph_messenger_destroy(client->msgr); |
641 | mempool_destroy(client->wb_pagevec_pool); | 653 | mempool_destroy(client->wb_pagevec_pool); |
@@ -876,14 +888,14 @@ static int ceph_register_bdi(struct super_block *sb, struct ceph_client *client) | |||
876 | { | 888 | { |
877 | int err; | 889 | int err; |
878 | 890 | ||
879 | sb->s_bdi = &client->backing_dev_info; | ||
880 | |||
881 | /* set ra_pages based on rsize mount option? */ | 891 | /* set ra_pages based on rsize mount option? */ |
882 | if (client->mount_args->rsize >= PAGE_CACHE_SIZE) | 892 | if (client->mount_args->rsize >= PAGE_CACHE_SIZE) |
883 | client->backing_dev_info.ra_pages = | 893 | client->backing_dev_info.ra_pages = |
884 | (client->mount_args->rsize + PAGE_CACHE_SIZE - 1) | 894 | (client->mount_args->rsize + PAGE_CACHE_SIZE - 1) |
885 | >> PAGE_SHIFT; | 895 | >> PAGE_SHIFT; |
886 | err = bdi_register_dev(&client->backing_dev_info, sb->s_dev); | 896 | err = bdi_register_dev(&client->backing_dev_info, sb->s_dev); |
897 | if (!err) | ||
898 | sb->s_bdi = &client->backing_dev_info; | ||
887 | return err; | 899 | return err; |
888 | } | 900 | } |
889 | 901 | ||
@@ -957,9 +969,6 @@ static void ceph_kill_sb(struct super_block *s) | |||
957 | dout("kill_sb %p\n", s); | 969 | dout("kill_sb %p\n", s); |
958 | ceph_mdsc_pre_umount(&client->mdsc); | 970 | ceph_mdsc_pre_umount(&client->mdsc); |
959 | kill_anon_super(s); /* will call put_super after sb is r/o */ | 971 | kill_anon_super(s); /* will call put_super after sb is r/o */ |
960 | if (s->s_bdi == &client->backing_dev_info) | ||
961 | bdi_unregister(&client->backing_dev_info); | ||
962 | bdi_destroy(&client->backing_dev_info); | ||
963 | ceph_destroy_client(client); | 972 | ceph_destroy_client(client); |
964 | } | 973 | } |
965 | 974 | ||