diff options
| -rw-r--r-- | fs/ceph/addr.c | 6 | ||||
| -rw-r--r-- | fs/ceph/caps.c | 19 | ||||
| -rw-r--r-- | fs/ceph/inode.c | 4 | ||||
| -rw-r--r-- | fs/ceph/mds_client.c | 34 | ||||
| -rw-r--r-- | fs/ceph/messenger.c | 17 | ||||
| -rw-r--r-- | fs/ceph/messenger.h | 1 | ||||
| -rw-r--r-- | fs/ceph/osd_client.c | 26 | ||||
| -rw-r--r-- | fs/ceph/osd_client.h | 3 | ||||
| -rw-r--r-- | fs/ceph/osdmap.c | 29 | ||||
| -rw-r--r-- | fs/ceph/osdmap.h | 2 | ||||
| -rw-r--r-- | fs/ceph/rados.h | 1 | ||||
| -rw-r--r-- | fs/ceph/super.c | 23 |
12 files changed, 116 insertions, 49 deletions
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 4b42c2bb603f..a9005d862ed4 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c | |||
| @@ -504,7 +504,6 @@ static void writepages_finish(struct ceph_osd_request *req, | |||
| 504 | int i; | 504 | int i; |
| 505 | struct ceph_snap_context *snapc = req->r_snapc; | 505 | struct ceph_snap_context *snapc = req->r_snapc; |
| 506 | struct address_space *mapping = inode->i_mapping; | 506 | struct address_space *mapping = inode->i_mapping; |
| 507 | struct writeback_control *wbc = req->r_wbc; | ||
| 508 | __s32 rc = -EIO; | 507 | __s32 rc = -EIO; |
| 509 | u64 bytes = 0; | 508 | u64 bytes = 0; |
| 510 | struct ceph_client *client = ceph_inode_to_client(inode); | 509 | struct ceph_client *client = ceph_inode_to_client(inode); |
| @@ -546,10 +545,6 @@ static void writepages_finish(struct ceph_osd_request *req, | |||
| 546 | clear_bdi_congested(&client->backing_dev_info, | 545 | clear_bdi_congested(&client->backing_dev_info, |
| 547 | BLK_RW_ASYNC); | 546 | BLK_RW_ASYNC); |
| 548 | 547 | ||
| 549 | if (i >= wrote) { | ||
| 550 | dout("inode %p skipping page %p\n", inode, page); | ||
| 551 | wbc->pages_skipped++; | ||
| 552 | } | ||
| 553 | ceph_put_snap_context((void *)page->private); | 548 | ceph_put_snap_context((void *)page->private); |
| 554 | page->private = 0; | 549 | page->private = 0; |
| 555 | ClearPagePrivate(page); | 550 | ClearPagePrivate(page); |
| @@ -799,7 +794,6 @@ get_more_pages: | |||
| 799 | alloc_page_vec(client, req); | 794 | alloc_page_vec(client, req); |
| 800 | req->r_callback = writepages_finish; | 795 | req->r_callback = writepages_finish; |
| 801 | req->r_inode = inode; | 796 | req->r_inode = inode; |
| 802 | req->r_wbc = wbc; | ||
| 803 | } | 797 | } |
| 804 | 798 | ||
| 805 | /* note position of first page in pvec */ | 799 | /* note position of first page in pvec */ |
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 0c1681806867..d9400534b279 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c | |||
| @@ -858,6 +858,8 @@ static int __ceph_is_any_caps(struct ceph_inode_info *ci) | |||
| 858 | } | 858 | } |
| 859 | 859 | ||
| 860 | /* | 860 | /* |
| 861 | * Remove a cap. Take steps to deal with a racing iterate_session_caps. | ||
| 862 | * | ||
| 861 | * caller should hold i_lock. | 863 | * caller should hold i_lock. |
| 862 | * caller will not hold session s_mutex if called from destroy_inode. | 864 | * caller will not hold session s_mutex if called from destroy_inode. |
| 863 | */ | 865 | */ |
| @@ -866,15 +868,10 @@ void __ceph_remove_cap(struct ceph_cap *cap) | |||
| 866 | struct ceph_mds_session *session = cap->session; | 868 | struct ceph_mds_session *session = cap->session; |
| 867 | struct ceph_inode_info *ci = cap->ci; | 869 | struct ceph_inode_info *ci = cap->ci; |
| 868 | struct ceph_mds_client *mdsc = &ceph_client(ci->vfs_inode.i_sb)->mdsc; | 870 | struct ceph_mds_client *mdsc = &ceph_client(ci->vfs_inode.i_sb)->mdsc; |
| 871 | int removed = 0; | ||
| 869 | 872 | ||
| 870 | dout("__ceph_remove_cap %p from %p\n", cap, &ci->vfs_inode); | 873 | dout("__ceph_remove_cap %p from %p\n", cap, &ci->vfs_inode); |
| 871 | 874 | ||
| 872 | /* remove from inode list */ | ||
| 873 | rb_erase(&cap->ci_node, &ci->i_caps); | ||
| 874 | cap->ci = NULL; | ||
| 875 | if (ci->i_auth_cap == cap) | ||
| 876 | ci->i_auth_cap = NULL; | ||
| 877 | |||
| 878 | /* remove from session list */ | 875 | /* remove from session list */ |
| 879 | spin_lock(&session->s_cap_lock); | 876 | spin_lock(&session->s_cap_lock); |
| 880 | if (session->s_cap_iterator == cap) { | 877 | if (session->s_cap_iterator == cap) { |
| @@ -885,10 +882,18 @@ void __ceph_remove_cap(struct ceph_cap *cap) | |||
| 885 | list_del_init(&cap->session_caps); | 882 | list_del_init(&cap->session_caps); |
| 886 | session->s_nr_caps--; | 883 | session->s_nr_caps--; |
| 887 | cap->session = NULL; | 884 | cap->session = NULL; |
| 885 | removed = 1; | ||
| 888 | } | 886 | } |
| 887 | /* protect backpointer with s_cap_lock: see iterate_session_caps */ | ||
| 888 | cap->ci = NULL; | ||
| 889 | spin_unlock(&session->s_cap_lock); | 889 | spin_unlock(&session->s_cap_lock); |
| 890 | 890 | ||
| 891 | if (cap->session == NULL) | 891 | /* remove from inode list */ |
| 892 | rb_erase(&cap->ci_node, &ci->i_caps); | ||
| 893 | if (ci->i_auth_cap == cap) | ||
| 894 | ci->i_auth_cap = NULL; | ||
| 895 | |||
| 896 | if (removed) | ||
| 892 | ceph_put_cap(cap); | 897 | ceph_put_cap(cap); |
| 893 | 898 | ||
| 894 | if (!__ceph_is_any_caps(ci) && ci->i_snap_realm) { | 899 | if (!__ceph_is_any_caps(ci) && ci->i_snap_realm) { |
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 261f3e6c0bcf..85b4d2ffdeba 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c | |||
| @@ -733,6 +733,10 @@ no_change: | |||
| 733 | __ceph_get_fmode(ci, cap_fmode); | 733 | __ceph_get_fmode(ci, cap_fmode); |
| 734 | spin_unlock(&inode->i_lock); | 734 | spin_unlock(&inode->i_lock); |
| 735 | } | 735 | } |
| 736 | } else if (cap_fmode >= 0) { | ||
| 737 | pr_warning("mds issued no caps on %llx.%llx\n", | ||
| 738 | ceph_vinop(inode)); | ||
| 739 | __ceph_get_fmode(ci, cap_fmode); | ||
| 736 | } | 740 | } |
| 737 | 741 | ||
| 738 | /* update delegation info? */ | 742 | /* update delegation info? */ |
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 60a9a4ae47be..24561a557e01 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c | |||
| @@ -736,9 +736,10 @@ static void cleanup_cap_releases(struct ceph_mds_session *session) | |||
| 736 | } | 736 | } |
| 737 | 737 | ||
| 738 | /* | 738 | /* |
| 739 | * Helper to safely iterate over all caps associated with a session. | 739 | * Helper to safely iterate over all caps associated with a session, with |
| 740 | * special care taken to handle a racing __ceph_remove_cap(). | ||
| 740 | * | 741 | * |
| 741 | * caller must hold session s_mutex | 742 | * Caller must hold session s_mutex. |
| 742 | */ | 743 | */ |
| 743 | static int iterate_session_caps(struct ceph_mds_session *session, | 744 | static int iterate_session_caps(struct ceph_mds_session *session, |
| 744 | int (*cb)(struct inode *, struct ceph_cap *, | 745 | int (*cb)(struct inode *, struct ceph_cap *, |
| @@ -2136,7 +2137,7 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, int mds) | |||
| 2136 | struct ceph_mds_session *session = NULL; | 2137 | struct ceph_mds_session *session = NULL; |
| 2137 | struct ceph_msg *reply; | 2138 | struct ceph_msg *reply; |
| 2138 | struct rb_node *p; | 2139 | struct rb_node *p; |
| 2139 | int err; | 2140 | int err = -ENOMEM; |
| 2140 | struct ceph_pagelist *pagelist; | 2141 | struct ceph_pagelist *pagelist; |
| 2141 | 2142 | ||
| 2142 | pr_info("reconnect to recovering mds%d\n", mds); | 2143 | pr_info("reconnect to recovering mds%d\n", mds); |
| @@ -2185,7 +2186,7 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, int mds) | |||
| 2185 | goto fail; | 2186 | goto fail; |
| 2186 | err = iterate_session_caps(session, encode_caps_cb, pagelist); | 2187 | err = iterate_session_caps(session, encode_caps_cb, pagelist); |
| 2187 | if (err < 0) | 2188 | if (err < 0) |
| 2188 | goto out; | 2189 | goto fail; |
| 2189 | 2190 | ||
| 2190 | /* | 2191 | /* |
| 2191 | * snaprealms. we provide mds with the ino, seq (version), and | 2192 | * snaprealms. we provide mds with the ino, seq (version), and |
| @@ -2213,28 +2214,31 @@ send: | |||
| 2213 | reply->nr_pages = calc_pages_for(0, pagelist->length); | 2214 | reply->nr_pages = calc_pages_for(0, pagelist->length); |
| 2214 | ceph_con_send(&session->s_con, reply); | 2215 | ceph_con_send(&session->s_con, reply); |
| 2215 | 2216 | ||
| 2216 | if (session) { | 2217 | session->s_state = CEPH_MDS_SESSION_OPEN; |
| 2217 | session->s_state = CEPH_MDS_SESSION_OPEN; | 2218 | mutex_unlock(&session->s_mutex); |
| 2218 | __wake_requests(mdsc, &session->s_waiting); | 2219 | |
| 2219 | } | 2220 | mutex_lock(&mdsc->mutex); |
| 2221 | __wake_requests(mdsc, &session->s_waiting); | ||
| 2222 | mutex_unlock(&mdsc->mutex); | ||
| 2223 | |||
| 2224 | ceph_put_mds_session(session); | ||
| 2220 | 2225 | ||
| 2221 | out: | ||
| 2222 | up_read(&mdsc->snap_rwsem); | 2226 | up_read(&mdsc->snap_rwsem); |
| 2223 | if (session) { | ||
| 2224 | mutex_unlock(&session->s_mutex); | ||
| 2225 | ceph_put_mds_session(session); | ||
| 2226 | } | ||
| 2227 | mutex_lock(&mdsc->mutex); | 2227 | mutex_lock(&mdsc->mutex); |
| 2228 | return; | 2228 | return; |
| 2229 | 2229 | ||
| 2230 | fail: | 2230 | fail: |
| 2231 | ceph_msg_put(reply); | 2231 | ceph_msg_put(reply); |
| 2232 | up_read(&mdsc->snap_rwsem); | ||
| 2233 | mutex_unlock(&session->s_mutex); | ||
| 2234 | ceph_put_mds_session(session); | ||
| 2232 | fail_nomsg: | 2235 | fail_nomsg: |
| 2233 | ceph_pagelist_release(pagelist); | 2236 | ceph_pagelist_release(pagelist); |
| 2234 | kfree(pagelist); | 2237 | kfree(pagelist); |
| 2235 | fail_nopagelist: | 2238 | fail_nopagelist: |
| 2236 | pr_err("ENOMEM preparing reconnect for mds%d\n", mds); | 2239 | pr_err("error %d preparing reconnect for mds%d\n", err, mds); |
| 2237 | goto out; | 2240 | mutex_lock(&mdsc->mutex); |
| 2241 | return; | ||
| 2238 | } | 2242 | } |
| 2239 | 2243 | ||
| 2240 | 2244 | ||
diff --git a/fs/ceph/messenger.c b/fs/ceph/messenger.c index 509f57d9ccb3..cd4fadb6491a 100644 --- a/fs/ceph/messenger.c +++ b/fs/ceph/messenger.c | |||
| @@ -492,7 +492,14 @@ static void prepare_write_message(struct ceph_connection *con) | |||
| 492 | list_move_tail(&m->list_head, &con->out_sent); | 492 | list_move_tail(&m->list_head, &con->out_sent); |
| 493 | } | 493 | } |
| 494 | 494 | ||
| 495 | m->hdr.seq = cpu_to_le64(++con->out_seq); | 495 | /* |
| 496 | * only assign outgoing seq # if we haven't sent this message | ||
| 497 | * yet. if it is requeued, resend with it's original seq. | ||
| 498 | */ | ||
| 499 | if (m->needs_out_seq) { | ||
| 500 | m->hdr.seq = cpu_to_le64(++con->out_seq); | ||
| 501 | m->needs_out_seq = false; | ||
| 502 | } | ||
| 496 | 503 | ||
| 497 | dout("prepare_write_message %p seq %lld type %d len %d+%d+%d %d pgs\n", | 504 | dout("prepare_write_message %p seq %lld type %d len %d+%d+%d %d pgs\n", |
| 498 | m, con->out_seq, le16_to_cpu(m->hdr.type), | 505 | m, con->out_seq, le16_to_cpu(m->hdr.type), |
| @@ -1986,6 +1993,8 @@ void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg) | |||
| 1986 | 1993 | ||
| 1987 | BUG_ON(msg->front.iov_len != le32_to_cpu(msg->hdr.front_len)); | 1994 | BUG_ON(msg->front.iov_len != le32_to_cpu(msg->hdr.front_len)); |
| 1988 | 1995 | ||
| 1996 | msg->needs_out_seq = true; | ||
| 1997 | |||
| 1989 | /* queue */ | 1998 | /* queue */ |
| 1990 | mutex_lock(&con->mutex); | 1999 | mutex_lock(&con->mutex); |
| 1991 | BUG_ON(!list_empty(&msg->list_head)); | 2000 | BUG_ON(!list_empty(&msg->list_head)); |
| @@ -2085,15 +2094,19 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, | |||
| 2085 | kref_init(&m->kref); | 2094 | kref_init(&m->kref); |
| 2086 | INIT_LIST_HEAD(&m->list_head); | 2095 | INIT_LIST_HEAD(&m->list_head); |
| 2087 | 2096 | ||
| 2097 | m->hdr.tid = 0; | ||
| 2088 | m->hdr.type = cpu_to_le16(type); | 2098 | m->hdr.type = cpu_to_le16(type); |
| 2099 | m->hdr.priority = cpu_to_le16(CEPH_MSG_PRIO_DEFAULT); | ||
| 2100 | m->hdr.version = 0; | ||
| 2089 | m->hdr.front_len = cpu_to_le32(front_len); | 2101 | m->hdr.front_len = cpu_to_le32(front_len); |
| 2090 | m->hdr.middle_len = 0; | 2102 | m->hdr.middle_len = 0; |
| 2091 | m->hdr.data_len = cpu_to_le32(page_len); | 2103 | m->hdr.data_len = cpu_to_le32(page_len); |
| 2092 | m->hdr.data_off = cpu_to_le16(page_off); | 2104 | m->hdr.data_off = cpu_to_le16(page_off); |
| 2093 | m->hdr.priority = cpu_to_le16(CEPH_MSG_PRIO_DEFAULT); | 2105 | m->hdr.reserved = 0; |
| 2094 | m->footer.front_crc = 0; | 2106 | m->footer.front_crc = 0; |
| 2095 | m->footer.middle_crc = 0; | 2107 | m->footer.middle_crc = 0; |
| 2096 | m->footer.data_crc = 0; | 2108 | m->footer.data_crc = 0; |
| 2109 | m->footer.flags = 0; | ||
| 2097 | m->front_max = front_len; | 2110 | m->front_max = front_len; |
| 2098 | m->front_is_vmalloc = false; | 2111 | m->front_is_vmalloc = false; |
| 2099 | m->more_to_follow = false; | 2112 | m->more_to_follow = false; |
diff --git a/fs/ceph/messenger.h b/fs/ceph/messenger.h index a343dae73cdc..a5caf91cc971 100644 --- a/fs/ceph/messenger.h +++ b/fs/ceph/messenger.h | |||
| @@ -86,6 +86,7 @@ struct ceph_msg { | |||
| 86 | struct kref kref; | 86 | struct kref kref; |
| 87 | bool front_is_vmalloc; | 87 | bool front_is_vmalloc; |
| 88 | bool more_to_follow; | 88 | bool more_to_follow; |
| 89 | bool needs_out_seq; | ||
| 89 | int front_max; | 90 | int front_max; |
| 90 | 91 | ||
| 91 | struct ceph_msgpool *pool; | 92 | struct ceph_msgpool *pool; |
diff --git a/fs/ceph/osd_client.c b/fs/ceph/osd_client.c index c7b4dedaace6..3514f71ff85f 100644 --- a/fs/ceph/osd_client.c +++ b/fs/ceph/osd_client.c | |||
| @@ -565,7 +565,8 @@ static int __map_osds(struct ceph_osd_client *osdc, | |||
| 565 | { | 565 | { |
| 566 | struct ceph_osd_request_head *reqhead = req->r_request->front.iov_base; | 566 | struct ceph_osd_request_head *reqhead = req->r_request->front.iov_base; |
| 567 | struct ceph_pg pgid; | 567 | struct ceph_pg pgid; |
| 568 | int o = -1; | 568 | int acting[CEPH_PG_MAX_SIZE]; |
| 569 | int o = -1, num = 0; | ||
| 569 | int err; | 570 | int err; |
| 570 | 571 | ||
| 571 | dout("map_osds %p tid %lld\n", req, req->r_tid); | 572 | dout("map_osds %p tid %lld\n", req, req->r_tid); |
| @@ -576,10 +577,16 @@ static int __map_osds(struct ceph_osd_client *osdc, | |||
| 576 | pgid = reqhead->layout.ol_pgid; | 577 | pgid = reqhead->layout.ol_pgid; |
| 577 | req->r_pgid = pgid; | 578 | req->r_pgid = pgid; |
| 578 | 579 | ||
| 579 | o = ceph_calc_pg_primary(osdc->osdmap, pgid); | 580 | err = ceph_calc_pg_acting(osdc->osdmap, pgid, acting); |
| 581 | if (err > 0) { | ||
| 582 | o = acting[0]; | ||
| 583 | num = err; | ||
| 584 | } | ||
| 580 | 585 | ||
| 581 | if ((req->r_osd && req->r_osd->o_osd == o && | 586 | if ((req->r_osd && req->r_osd->o_osd == o && |
| 582 | req->r_sent >= req->r_osd->o_incarnation) || | 587 | req->r_sent >= req->r_osd->o_incarnation && |
| 588 | req->r_num_pg_osds == num && | ||
| 589 | memcmp(req->r_pg_osds, acting, sizeof(acting[0])*num) == 0) || | ||
| 583 | (req->r_osd == NULL && o == -1)) | 590 | (req->r_osd == NULL && o == -1)) |
| 584 | return 0; /* no change */ | 591 | return 0; /* no change */ |
| 585 | 592 | ||
| @@ -587,6 +594,10 @@ static int __map_osds(struct ceph_osd_client *osdc, | |||
| 587 | req->r_tid, le32_to_cpu(pgid.pool), le16_to_cpu(pgid.ps), o, | 594 | req->r_tid, le32_to_cpu(pgid.pool), le16_to_cpu(pgid.ps), o, |
| 588 | req->r_osd ? req->r_osd->o_osd : -1); | 595 | req->r_osd ? req->r_osd->o_osd : -1); |
| 589 | 596 | ||
| 597 | /* record full pg acting set */ | ||
| 598 | memcpy(req->r_pg_osds, acting, sizeof(acting[0]) * num); | ||
| 599 | req->r_num_pg_osds = num; | ||
| 600 | |||
| 590 | if (req->r_osd) { | 601 | if (req->r_osd) { |
| 591 | __cancel_request(req); | 602 | __cancel_request(req); |
| 592 | list_del_init(&req->r_osd_item); | 603 | list_del_init(&req->r_osd_item); |
| @@ -612,7 +623,7 @@ static int __map_osds(struct ceph_osd_client *osdc, | |||
| 612 | __remove_osd_from_lru(req->r_osd); | 623 | __remove_osd_from_lru(req->r_osd); |
| 613 | list_add(&req->r_osd_item, &req->r_osd->o_requests); | 624 | list_add(&req->r_osd_item, &req->r_osd->o_requests); |
| 614 | } | 625 | } |
| 615 | err = 1; /* osd changed */ | 626 | err = 1; /* osd or pg changed */ |
| 616 | 627 | ||
| 617 | out: | 628 | out: |
| 618 | return err; | 629 | return err; |
| @@ -779,16 +790,18 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg, | |||
| 779 | struct ceph_osd_request *req; | 790 | struct ceph_osd_request *req; |
| 780 | u64 tid; | 791 | u64 tid; |
| 781 | int numops, object_len, flags; | 792 | int numops, object_len, flags; |
| 793 | s32 result; | ||
| 782 | 794 | ||
| 783 | tid = le64_to_cpu(msg->hdr.tid); | 795 | tid = le64_to_cpu(msg->hdr.tid); |
| 784 | if (msg->front.iov_len < sizeof(*rhead)) | 796 | if (msg->front.iov_len < sizeof(*rhead)) |
| 785 | goto bad; | 797 | goto bad; |
| 786 | numops = le32_to_cpu(rhead->num_ops); | 798 | numops = le32_to_cpu(rhead->num_ops); |
| 787 | object_len = le32_to_cpu(rhead->object_len); | 799 | object_len = le32_to_cpu(rhead->object_len); |
| 800 | result = le32_to_cpu(rhead->result); | ||
| 788 | if (msg->front.iov_len != sizeof(*rhead) + object_len + | 801 | if (msg->front.iov_len != sizeof(*rhead) + object_len + |
| 789 | numops * sizeof(struct ceph_osd_op)) | 802 | numops * sizeof(struct ceph_osd_op)) |
| 790 | goto bad; | 803 | goto bad; |
| 791 | dout("handle_reply %p tid %llu\n", msg, tid); | 804 | dout("handle_reply %p tid %llu result %d\n", msg, tid, (int)result); |
| 792 | 805 | ||
| 793 | /* lookup */ | 806 | /* lookup */ |
| 794 | mutex_lock(&osdc->request_mutex); | 807 | mutex_lock(&osdc->request_mutex); |
| @@ -834,7 +847,8 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg, | |||
| 834 | dout("handle_reply tid %llu flags %d\n", tid, flags); | 847 | dout("handle_reply tid %llu flags %d\n", tid, flags); |
| 835 | 848 | ||
| 836 | /* either this is a read, or we got the safe response */ | 849 | /* either this is a read, or we got the safe response */ |
| 837 | if ((flags & CEPH_OSD_FLAG_ONDISK) || | 850 | if (result < 0 || |
| 851 | (flags & CEPH_OSD_FLAG_ONDISK) || | ||
| 838 | ((flags & CEPH_OSD_FLAG_WRITE) == 0)) | 852 | ((flags & CEPH_OSD_FLAG_WRITE) == 0)) |
| 839 | __unregister_request(osdc, req); | 853 | __unregister_request(osdc, req); |
| 840 | 854 | ||
diff --git a/fs/ceph/osd_client.h b/fs/ceph/osd_client.h index b0759911e7c3..ce776989ef6a 100644 --- a/fs/ceph/osd_client.h +++ b/fs/ceph/osd_client.h | |||
| @@ -48,6 +48,8 @@ struct ceph_osd_request { | |||
| 48 | struct list_head r_osd_item; | 48 | struct list_head r_osd_item; |
| 49 | struct ceph_osd *r_osd; | 49 | struct ceph_osd *r_osd; |
| 50 | struct ceph_pg r_pgid; | 50 | struct ceph_pg r_pgid; |
| 51 | int r_pg_osds[CEPH_PG_MAX_SIZE]; | ||
| 52 | int r_num_pg_osds; | ||
| 51 | 53 | ||
| 52 | struct ceph_connection *r_con_filling_msg; | 54 | struct ceph_connection *r_con_filling_msg; |
| 53 | 55 | ||
| @@ -66,7 +68,6 @@ struct ceph_osd_request { | |||
| 66 | struct list_head r_unsafe_item; | 68 | struct list_head r_unsafe_item; |
| 67 | 69 | ||
| 68 | struct inode *r_inode; /* for use by callbacks */ | 70 | struct inode *r_inode; /* for use by callbacks */ |
| 69 | struct writeback_control *r_wbc; /* ditto */ | ||
| 70 | 71 | ||
| 71 | char r_oid[40]; /* object name */ | 72 | char r_oid[40]; /* object name */ |
| 72 | int r_oid_len; | 73 | int r_oid_len; |
diff --git a/fs/ceph/osdmap.c b/fs/ceph/osdmap.c index 2e2c15eed82a..cfdd8f4388b7 100644 --- a/fs/ceph/osdmap.c +++ b/fs/ceph/osdmap.c | |||
| @@ -1041,12 +1041,33 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid, | |||
| 1041 | } | 1041 | } |
| 1042 | 1042 | ||
| 1043 | /* | 1043 | /* |
| 1044 | * Return acting set for given pgid. | ||
| 1045 | */ | ||
| 1046 | int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg pgid, | ||
| 1047 | int *acting) | ||
| 1048 | { | ||
| 1049 | int rawosds[CEPH_PG_MAX_SIZE], *osds; | ||
| 1050 | int i, o, num = CEPH_PG_MAX_SIZE; | ||
| 1051 | |||
| 1052 | osds = calc_pg_raw(osdmap, pgid, rawosds, &num); | ||
| 1053 | if (!osds) | ||
| 1054 | return -1; | ||
| 1055 | |||
| 1056 | /* primary is first up osd */ | ||
| 1057 | o = 0; | ||
| 1058 | for (i = 0; i < num; i++) | ||
| 1059 | if (ceph_osd_is_up(osdmap, osds[i])) | ||
| 1060 | acting[o++] = osds[i]; | ||
| 1061 | return o; | ||
| 1062 | } | ||
| 1063 | |||
| 1064 | /* | ||
| 1044 | * Return primary osd for given pgid, or -1 if none. | 1065 | * Return primary osd for given pgid, or -1 if none. |
| 1045 | */ | 1066 | */ |
| 1046 | int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, struct ceph_pg pgid) | 1067 | int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, struct ceph_pg pgid) |
| 1047 | { | 1068 | { |
| 1048 | int rawosds[10], *osds; | 1069 | int rawosds[CEPH_PG_MAX_SIZE], *osds; |
| 1049 | int i, num = ARRAY_SIZE(rawosds); | 1070 | int i, num = CEPH_PG_MAX_SIZE; |
| 1050 | 1071 | ||
| 1051 | osds = calc_pg_raw(osdmap, pgid, rawosds, &num); | 1072 | osds = calc_pg_raw(osdmap, pgid, rawosds, &num); |
| 1052 | if (!osds) | 1073 | if (!osds) |
| @@ -1054,9 +1075,7 @@ int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, struct ceph_pg pgid) | |||
| 1054 | 1075 | ||
| 1055 | /* primary is first up osd */ | 1076 | /* primary is first up osd */ |
| 1056 | for (i = 0; i < num; i++) | 1077 | for (i = 0; i < num; i++) |
| 1057 | if (ceph_osd_is_up(osdmap, osds[i])) { | 1078 | if (ceph_osd_is_up(osdmap, osds[i])) |
| 1058 | return osds[i]; | 1079 | return osds[i]; |
| 1059 | break; | ||
| 1060 | } | ||
| 1061 | return -1; | 1080 | return -1; |
| 1062 | } | 1081 | } |
diff --git a/fs/ceph/osdmap.h b/fs/ceph/osdmap.h index 8bc9f1e4f562..970b547e510d 100644 --- a/fs/ceph/osdmap.h +++ b/fs/ceph/osdmap.h | |||
| @@ -120,6 +120,8 @@ extern int ceph_calc_object_layout(struct ceph_object_layout *ol, | |||
| 120 | const char *oid, | 120 | const char *oid, |
| 121 | struct ceph_file_layout *fl, | 121 | struct ceph_file_layout *fl, |
| 122 | struct ceph_osdmap *osdmap); | 122 | struct ceph_osdmap *osdmap); |
| 123 | extern int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg pgid, | ||
| 124 | int *acting); | ||
| 123 | extern int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, | 125 | extern int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, |
| 124 | struct ceph_pg pgid); | 126 | struct ceph_pg pgid); |
| 125 | 127 | ||
diff --git a/fs/ceph/rados.h b/fs/ceph/rados.h index a1fc1d017b58..fd56451a871f 100644 --- a/fs/ceph/rados.h +++ b/fs/ceph/rados.h | |||
| @@ -58,6 +58,7 @@ struct ceph_timespec { | |||
| 58 | #define CEPH_PG_LAYOUT_LINEAR 2 | 58 | #define CEPH_PG_LAYOUT_LINEAR 2 |
| 59 | #define CEPH_PG_LAYOUT_HYBRID 3 | 59 | #define CEPH_PG_LAYOUT_HYBRID 3 |
| 60 | 60 | ||
| 61 | #define CEPH_PG_MAX_SIZE 16 /* max # osds in a single pg */ | ||
| 61 | 62 | ||
| 62 | /* | 63 | /* |
| 63 | * placement group. | 64 | * placement group. |
diff --git a/fs/ceph/super.c b/fs/ceph/super.c index f888cf487b7c..110857ba9269 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c | |||
| @@ -47,10 +47,20 @@ const char *ceph_file_part(const char *s, int len) | |||
| 47 | */ | 47 | */ |
| 48 | static void ceph_put_super(struct super_block *s) | 48 | static void ceph_put_super(struct super_block *s) |
| 49 | { | 49 | { |
| 50 | struct ceph_client *cl = ceph_client(s); | 50 | struct ceph_client *client = ceph_sb_to_client(s); |
| 51 | 51 | ||
| 52 | dout("put_super\n"); | 52 | dout("put_super\n"); |
| 53 | ceph_mdsc_close_sessions(&cl->mdsc); | 53 | ceph_mdsc_close_sessions(&client->mdsc); |
| 54 | |||
| 55 | /* | ||
| 56 | * ensure we release the bdi before put_anon_super releases | ||
| 57 | * the device name. | ||
| 58 | */ | ||
| 59 | if (s->s_bdi == &client->backing_dev_info) { | ||
| 60 | bdi_unregister(&client->backing_dev_info); | ||
| 61 | s->s_bdi = NULL; | ||
| 62 | } | ||
| 63 | |||
| 54 | return; | 64 | return; |
| 55 | } | 65 | } |
| 56 | 66 | ||
| @@ -636,6 +646,8 @@ static void ceph_destroy_client(struct ceph_client *client) | |||
| 636 | destroy_workqueue(client->pg_inv_wq); | 646 | destroy_workqueue(client->pg_inv_wq); |
| 637 | destroy_workqueue(client->trunc_wq); | 647 | destroy_workqueue(client->trunc_wq); |
| 638 | 648 | ||
| 649 | bdi_destroy(&client->backing_dev_info); | ||
| 650 | |||
| 639 | if (client->msgr) | 651 | if (client->msgr) |
| 640 | ceph_messenger_destroy(client->msgr); | 652 | ceph_messenger_destroy(client->msgr); |
| 641 | mempool_destroy(client->wb_pagevec_pool); | 653 | mempool_destroy(client->wb_pagevec_pool); |
| @@ -876,14 +888,14 @@ static int ceph_register_bdi(struct super_block *sb, struct ceph_client *client) | |||
| 876 | { | 888 | { |
| 877 | int err; | 889 | int err; |
| 878 | 890 | ||
| 879 | sb->s_bdi = &client->backing_dev_info; | ||
| 880 | |||
| 881 | /* set ra_pages based on rsize mount option? */ | 891 | /* set ra_pages based on rsize mount option? */ |
| 882 | if (client->mount_args->rsize >= PAGE_CACHE_SIZE) | 892 | if (client->mount_args->rsize >= PAGE_CACHE_SIZE) |
| 883 | client->backing_dev_info.ra_pages = | 893 | client->backing_dev_info.ra_pages = |
| 884 | (client->mount_args->rsize + PAGE_CACHE_SIZE - 1) | 894 | (client->mount_args->rsize + PAGE_CACHE_SIZE - 1) |
| 885 | >> PAGE_SHIFT; | 895 | >> PAGE_SHIFT; |
| 886 | err = bdi_register_dev(&client->backing_dev_info, sb->s_dev); | 896 | err = bdi_register_dev(&client->backing_dev_info, sb->s_dev); |
| 897 | if (!err) | ||
| 898 | sb->s_bdi = &client->backing_dev_info; | ||
| 887 | return err; | 899 | return err; |
| 888 | } | 900 | } |
| 889 | 901 | ||
| @@ -957,9 +969,6 @@ static void ceph_kill_sb(struct super_block *s) | |||
| 957 | dout("kill_sb %p\n", s); | 969 | dout("kill_sb %p\n", s); |
| 958 | ceph_mdsc_pre_umount(&client->mdsc); | 970 | ceph_mdsc_pre_umount(&client->mdsc); |
| 959 | kill_anon_super(s); /* will call put_super after sb is r/o */ | 971 | kill_anon_super(s); /* will call put_super after sb is r/o */ |
| 960 | if (s->s_bdi == &client->backing_dev_info) | ||
| 961 | bdi_unregister(&client->backing_dev_info); | ||
| 962 | bdi_destroy(&client->backing_dev_info); | ||
| 963 | ceph_destroy_client(client); | 972 | ceph_destroy_client(client); |
| 964 | } | 973 | } |
| 965 | 974 | ||
