diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2010-05-24 10:37:52 -0400 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2010-05-24 10:37:52 -0400 |
| commit | 6e188240ebc2a132d70924942d7c8b9acb46e11a (patch) | |
| tree | 7628df39f9c1d60a639504faaf6b5941b2c4b4ae | |
| parent | 62a11ae3405b6da2535d28e5facc2de5af4a7e62 (diff) | |
| parent | 240ed68eb567d80dd6bab739341999a5ab0ad55d (diff) | |
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client: (59 commits)
ceph: reuse mon subscribe message instead of allocated anew
ceph: avoid resending queued message to monitor
ceph: Storage class should be before const qualifier
ceph: all allocation functions should get gfp_mask
ceph: specify max_bytes on readdir replies
ceph: cleanup pool op strings
ceph: Use kzalloc
ceph: use common helper for aborted dir request invalidation
ceph: cope with out of order (unsafe after safe) mds reply
ceph: save peer feature bits in connection structure
ceph: resync headers with userland
ceph: use ceph. prefix for virtual xattrs
ceph: throw out dirty caps metadata, data on session teardown
ceph: attempt mds reconnect if mds closes our session
ceph: clean up send_mds_reconnect interface
ceph: wait for mds OPEN reply to indicate reconnect success
ceph: only send cap releases when mds is OPEN|HUNG
ceph: dicard cap releases on mds restart
ceph: make mon client statfs handling more generic
ceph: drop src address(es) from message header [new protocol feature]
...
| -rw-r--r-- | fs/ceph/addr.c | 11 | ||||
| -rw-r--r-- | fs/ceph/auth.c | 9 | ||||
| -rw-r--r-- | fs/ceph/auth.h | 2 | ||||
| -rw-r--r-- | fs/ceph/auth_none.c | 1 | ||||
| -rw-r--r-- | fs/ceph/auth_x.c | 19 | ||||
| -rw-r--r-- | fs/ceph/caps.c | 24 | ||||
| -rw-r--r-- | fs/ceph/ceph_fs.h | 62 | ||||
| -rw-r--r-- | fs/ceph/ceph_strings.c | 16 | ||||
| -rw-r--r-- | fs/ceph/debugfs.c | 13 | ||||
| -rw-r--r-- | fs/ceph/dir.c | 45 | ||||
| -rw-r--r-- | fs/ceph/export.c | 14 | ||||
| -rw-r--r-- | fs/ceph/file.c | 16 | ||||
| -rw-r--r-- | fs/ceph/inode.c | 97 | ||||
| -rw-r--r-- | fs/ceph/ioctl.c | 2 | ||||
| -rw-r--r-- | fs/ceph/mds_client.c | 385 | ||||
| -rw-r--r-- | fs/ceph/mds_client.h | 6 | ||||
| -rw-r--r-- | fs/ceph/messenger.c | 91 | ||||
| -rw-r--r-- | fs/ceph/messenger.h | 10 | ||||
| -rw-r--r-- | fs/ceph/mon_client.c | 257 | ||||
| -rw-r--r-- | fs/ceph/mon_client.h | 27 | ||||
| -rw-r--r-- | fs/ceph/msgpool.c | 180 | ||||
| -rw-r--r-- | fs/ceph/msgpool.h | 12 | ||||
| -rw-r--r-- | fs/ceph/msgr.h | 21 | ||||
| -rw-r--r-- | fs/ceph/osd_client.c | 98 | ||||
| -rw-r--r-- | fs/ceph/pagelist.c | 2 | ||||
| -rw-r--r-- | fs/ceph/rados.h | 23 | ||||
| -rw-r--r-- | fs/ceph/snap.c | 2 | ||||
| -rw-r--r-- | fs/ceph/super.c | 125 | ||||
| -rw-r--r-- | fs/ceph/super.h | 30 | ||||
| -rw-r--r-- | fs/ceph/xattr.c | 35 |
30 files changed, 876 insertions, 759 deletions
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index a9005d862ed4..d9c60b84949a 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c | |||
| @@ -274,7 +274,6 @@ static int ceph_readpages(struct file *file, struct address_space *mapping, | |||
| 274 | struct ceph_osd_client *osdc = &ceph_inode_to_client(inode)->osdc; | 274 | struct ceph_osd_client *osdc = &ceph_inode_to_client(inode)->osdc; |
| 275 | int rc = 0; | 275 | int rc = 0; |
| 276 | struct page **pages; | 276 | struct page **pages; |
| 277 | struct pagevec pvec; | ||
| 278 | loff_t offset; | 277 | loff_t offset; |
| 279 | u64 len; | 278 | u64 len; |
| 280 | 279 | ||
| @@ -297,8 +296,6 @@ static int ceph_readpages(struct file *file, struct address_space *mapping, | |||
| 297 | if (rc < 0) | 296 | if (rc < 0) |
| 298 | goto out; | 297 | goto out; |
| 299 | 298 | ||
| 300 | /* set uptodate and add to lru in pagevec-sized chunks */ | ||
| 301 | pagevec_init(&pvec, 0); | ||
| 302 | for (; !list_empty(page_list) && len > 0; | 299 | for (; !list_empty(page_list) && len > 0; |
| 303 | rc -= PAGE_CACHE_SIZE, len -= PAGE_CACHE_SIZE) { | 300 | rc -= PAGE_CACHE_SIZE, len -= PAGE_CACHE_SIZE) { |
| 304 | struct page *page = | 301 | struct page *page = |
| @@ -312,7 +309,7 @@ static int ceph_readpages(struct file *file, struct address_space *mapping, | |||
| 312 | zero_user_segment(page, s, PAGE_CACHE_SIZE); | 309 | zero_user_segment(page, s, PAGE_CACHE_SIZE); |
| 313 | } | 310 | } |
| 314 | 311 | ||
| 315 | if (add_to_page_cache(page, mapping, page->index, GFP_NOFS)) { | 312 | if (add_to_page_cache_lru(page, mapping, page->index, GFP_NOFS)) { |
| 316 | page_cache_release(page); | 313 | page_cache_release(page); |
| 317 | dout("readpages %p add_to_page_cache failed %p\n", | 314 | dout("readpages %p add_to_page_cache failed %p\n", |
| 318 | inode, page); | 315 | inode, page); |
| @@ -323,10 +320,8 @@ static int ceph_readpages(struct file *file, struct address_space *mapping, | |||
| 323 | flush_dcache_page(page); | 320 | flush_dcache_page(page); |
| 324 | SetPageUptodate(page); | 321 | SetPageUptodate(page); |
| 325 | unlock_page(page); | 322 | unlock_page(page); |
| 326 | if (pagevec_add(&pvec, page) == 0) | 323 | page_cache_release(page); |
| 327 | pagevec_lru_add_file(&pvec); /* add to lru */ | ||
| 328 | } | 324 | } |
| 329 | pagevec_lru_add_file(&pvec); | ||
| 330 | rc = 0; | 325 | rc = 0; |
| 331 | 326 | ||
| 332 | out: | 327 | out: |
| @@ -568,7 +563,7 @@ static void writepages_finish(struct ceph_osd_request *req, | |||
| 568 | ceph_release_pages(req->r_pages, req->r_num_pages); | 563 | ceph_release_pages(req->r_pages, req->r_num_pages); |
| 569 | if (req->r_pages_from_pool) | 564 | if (req->r_pages_from_pool) |
| 570 | mempool_free(req->r_pages, | 565 | mempool_free(req->r_pages, |
| 571 | ceph_client(inode->i_sb)->wb_pagevec_pool); | 566 | ceph_sb_to_client(inode->i_sb)->wb_pagevec_pool); |
| 572 | else | 567 | else |
| 573 | kfree(req->r_pages); | 568 | kfree(req->r_pages); |
| 574 | ceph_osdc_put_request(req); | 569 | ceph_osdc_put_request(req); |
diff --git a/fs/ceph/auth.c b/fs/ceph/auth.c index 818afe72e6c7..9f46de2ba7a7 100644 --- a/fs/ceph/auth.c +++ b/fs/ceph/auth.c | |||
| @@ -150,7 +150,8 @@ int ceph_build_auth_request(struct ceph_auth_client *ac, | |||
| 150 | 150 | ||
| 151 | ret = ac->ops->build_request(ac, p + sizeof(u32), end); | 151 | ret = ac->ops->build_request(ac, p + sizeof(u32), end); |
| 152 | if (ret < 0) { | 152 | if (ret < 0) { |
| 153 | pr_err("error %d building request\n", ret); | 153 | pr_err("error %d building auth method %s request\n", ret, |
| 154 | ac->ops->name); | ||
| 154 | return ret; | 155 | return ret; |
| 155 | } | 156 | } |
| 156 | dout(" built request %d bytes\n", ret); | 157 | dout(" built request %d bytes\n", ret); |
| @@ -216,8 +217,8 @@ int ceph_handle_auth_reply(struct ceph_auth_client *ac, | |||
| 216 | if (ac->protocol != protocol) { | 217 | if (ac->protocol != protocol) { |
| 217 | ret = ceph_auth_init_protocol(ac, protocol); | 218 | ret = ceph_auth_init_protocol(ac, protocol); |
| 218 | if (ret) { | 219 | if (ret) { |
| 219 | pr_err("error %d on auth protocol %d init\n", | 220 | pr_err("error %d on auth method %s init\n", |
| 220 | ret, protocol); | 221 | ret, ac->ops->name); |
| 221 | goto out; | 222 | goto out; |
| 222 | } | 223 | } |
| 223 | } | 224 | } |
| @@ -229,7 +230,7 @@ int ceph_handle_auth_reply(struct ceph_auth_client *ac, | |||
| 229 | if (ret == -EAGAIN) { | 230 | if (ret == -EAGAIN) { |
| 230 | return ceph_build_auth_request(ac, reply_buf, reply_len); | 231 | return ceph_build_auth_request(ac, reply_buf, reply_len); |
| 231 | } else if (ret) { | 232 | } else if (ret) { |
| 232 | pr_err("authentication error %d\n", ret); | 233 | pr_err("auth method '%s' error %d\n", ac->ops->name, ret); |
| 233 | return ret; | 234 | return ret; |
| 234 | } | 235 | } |
| 235 | return 0; | 236 | return 0; |
diff --git a/fs/ceph/auth.h b/fs/ceph/auth.h index ca4f57cfb267..4429a707c021 100644 --- a/fs/ceph/auth.h +++ b/fs/ceph/auth.h | |||
| @@ -15,6 +15,8 @@ struct ceph_auth_client; | |||
| 15 | struct ceph_authorizer; | 15 | struct ceph_authorizer; |
| 16 | 16 | ||
| 17 | struct ceph_auth_client_ops { | 17 | struct ceph_auth_client_ops { |
| 18 | const char *name; | ||
| 19 | |||
| 18 | /* | 20 | /* |
| 19 | * true if we are authenticated and can connect to | 21 | * true if we are authenticated and can connect to |
| 20 | * services. | 22 | * services. |
diff --git a/fs/ceph/auth_none.c b/fs/ceph/auth_none.c index 8cd9e3af07f7..24407c119291 100644 --- a/fs/ceph/auth_none.c +++ b/fs/ceph/auth_none.c | |||
| @@ -94,6 +94,7 @@ static void ceph_auth_none_destroy_authorizer(struct ceph_auth_client *ac, | |||
| 94 | } | 94 | } |
| 95 | 95 | ||
| 96 | static const struct ceph_auth_client_ops ceph_auth_none_ops = { | 96 | static const struct ceph_auth_client_ops ceph_auth_none_ops = { |
| 97 | .name = "none", | ||
| 97 | .reset = reset, | 98 | .reset = reset, |
| 98 | .destroy = destroy, | 99 | .destroy = destroy, |
| 99 | .is_authenticated = is_authenticated, | 100 | .is_authenticated = is_authenticated, |
diff --git a/fs/ceph/auth_x.c b/fs/ceph/auth_x.c index fee5a08da881..7b206231566d 100644 --- a/fs/ceph/auth_x.c +++ b/fs/ceph/auth_x.c | |||
| @@ -127,7 +127,7 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac, | |||
| 127 | int ret; | 127 | int ret; |
| 128 | char *dbuf; | 128 | char *dbuf; |
| 129 | char *ticket_buf; | 129 | char *ticket_buf; |
| 130 | u8 struct_v; | 130 | u8 reply_struct_v; |
| 131 | 131 | ||
| 132 | dbuf = kmalloc(TEMP_TICKET_BUF_LEN, GFP_NOFS); | 132 | dbuf = kmalloc(TEMP_TICKET_BUF_LEN, GFP_NOFS); |
| 133 | if (!dbuf) | 133 | if (!dbuf) |
| @@ -139,14 +139,14 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac, | |||
| 139 | goto out_dbuf; | 139 | goto out_dbuf; |
| 140 | 140 | ||
| 141 | ceph_decode_need(&p, end, 1 + sizeof(u32), bad); | 141 | ceph_decode_need(&p, end, 1 + sizeof(u32), bad); |
| 142 | struct_v = ceph_decode_8(&p); | 142 | reply_struct_v = ceph_decode_8(&p); |
| 143 | if (struct_v != 1) | 143 | if (reply_struct_v != 1) |
| 144 | goto bad; | 144 | goto bad; |
| 145 | num = ceph_decode_32(&p); | 145 | num = ceph_decode_32(&p); |
| 146 | dout("%d tickets\n", num); | 146 | dout("%d tickets\n", num); |
| 147 | while (num--) { | 147 | while (num--) { |
| 148 | int type; | 148 | int type; |
| 149 | u8 struct_v; | 149 | u8 tkt_struct_v, blob_struct_v; |
| 150 | struct ceph_x_ticket_handler *th; | 150 | struct ceph_x_ticket_handler *th; |
| 151 | void *dp, *dend; | 151 | void *dp, *dend; |
| 152 | int dlen; | 152 | int dlen; |
| @@ -165,8 +165,8 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac, | |||
| 165 | type = ceph_decode_32(&p); | 165 | type = ceph_decode_32(&p); |
| 166 | dout(" ticket type %d %s\n", type, ceph_entity_type_name(type)); | 166 | dout(" ticket type %d %s\n", type, ceph_entity_type_name(type)); |
| 167 | 167 | ||
| 168 | struct_v = ceph_decode_8(&p); | 168 | tkt_struct_v = ceph_decode_8(&p); |
| 169 | if (struct_v != 1) | 169 | if (tkt_struct_v != 1) |
| 170 | goto bad; | 170 | goto bad; |
| 171 | 171 | ||
| 172 | th = get_ticket_handler(ac, type); | 172 | th = get_ticket_handler(ac, type); |
| @@ -186,8 +186,8 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac, | |||
| 186 | dend = dbuf + dlen; | 186 | dend = dbuf + dlen; |
| 187 | dp = dbuf; | 187 | dp = dbuf; |
| 188 | 188 | ||
| 189 | struct_v = ceph_decode_8(&dp); | 189 | tkt_struct_v = ceph_decode_8(&dp); |
| 190 | if (struct_v != 1) | 190 | if (tkt_struct_v != 1) |
| 191 | goto bad; | 191 | goto bad; |
| 192 | 192 | ||
| 193 | memcpy(&old_key, &th->session_key, sizeof(old_key)); | 193 | memcpy(&old_key, &th->session_key, sizeof(old_key)); |
| @@ -224,7 +224,7 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac, | |||
| 224 | tpend = tp + dlen; | 224 | tpend = tp + dlen; |
| 225 | dout(" ticket blob is %d bytes\n", dlen); | 225 | dout(" ticket blob is %d bytes\n", dlen); |
| 226 | ceph_decode_need(&tp, tpend, 1 + sizeof(u64), bad); | 226 | ceph_decode_need(&tp, tpend, 1 + sizeof(u64), bad); |
| 227 | struct_v = ceph_decode_8(&tp); | 227 | blob_struct_v = ceph_decode_8(&tp); |
| 228 | new_secret_id = ceph_decode_64(&tp); | 228 | new_secret_id = ceph_decode_64(&tp); |
| 229 | ret = ceph_decode_buffer(&new_ticket_blob, &tp, tpend); | 229 | ret = ceph_decode_buffer(&new_ticket_blob, &tp, tpend); |
| 230 | if (ret) | 230 | if (ret) |
| @@ -618,6 +618,7 @@ static void ceph_x_invalidate_authorizer(struct ceph_auth_client *ac, | |||
| 618 | 618 | ||
| 619 | 619 | ||
| 620 | static const struct ceph_auth_client_ops ceph_x_ops = { | 620 | static const struct ceph_auth_client_ops ceph_x_ops = { |
| 621 | .name = "x", | ||
| 621 | .is_authenticated = ceph_x_is_authenticated, | 622 | .is_authenticated = ceph_x_is_authenticated, |
| 622 | .build_request = ceph_x_build_request, | 623 | .build_request = ceph_x_build_request, |
| 623 | .handle_reply = ceph_x_handle_reply, | 624 | .handle_reply = ceph_x_handle_reply, |
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index d9400534b279..0dd0b81e64f7 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c | |||
| @@ -867,7 +867,8 @@ void __ceph_remove_cap(struct ceph_cap *cap) | |||
| 867 | { | 867 | { |
| 868 | struct ceph_mds_session *session = cap->session; | 868 | struct ceph_mds_session *session = cap->session; |
| 869 | struct ceph_inode_info *ci = cap->ci; | 869 | struct ceph_inode_info *ci = cap->ci; |
| 870 | struct ceph_mds_client *mdsc = &ceph_client(ci->vfs_inode.i_sb)->mdsc; | 870 | struct ceph_mds_client *mdsc = |
| 871 | &ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc; | ||
| 871 | int removed = 0; | 872 | int removed = 0; |
| 872 | 873 | ||
| 873 | dout("__ceph_remove_cap %p from %p\n", cap, &ci->vfs_inode); | 874 | dout("__ceph_remove_cap %p from %p\n", cap, &ci->vfs_inode); |
| @@ -937,9 +938,9 @@ static int send_cap_msg(struct ceph_mds_session *session, | |||
| 937 | seq, issue_seq, mseq, follows, size, max_size, | 938 | seq, issue_seq, mseq, follows, size, max_size, |
| 938 | xattr_version, xattrs_buf ? (int)xattrs_buf->vec.iov_len : 0); | 939 | xattr_version, xattrs_buf ? (int)xattrs_buf->vec.iov_len : 0); |
| 939 | 940 | ||
| 940 | msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPS, sizeof(*fc), 0, 0, NULL); | 941 | msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPS, sizeof(*fc), GFP_NOFS); |
| 941 | if (IS_ERR(msg)) | 942 | if (!msg) |
| 942 | return PTR_ERR(msg); | 943 | return -ENOMEM; |
| 943 | 944 | ||
| 944 | msg->hdr.tid = cpu_to_le64(flush_tid); | 945 | msg->hdr.tid = cpu_to_le64(flush_tid); |
| 945 | 946 | ||
| @@ -1298,7 +1299,8 @@ static void ceph_flush_snaps(struct ceph_inode_info *ci) | |||
| 1298 | */ | 1299 | */ |
| 1299 | void __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask) | 1300 | void __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask) |
| 1300 | { | 1301 | { |
| 1301 | struct ceph_mds_client *mdsc = &ceph_client(ci->vfs_inode.i_sb)->mdsc; | 1302 | struct ceph_mds_client *mdsc = |
| 1303 | &ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc; | ||
| 1302 | struct inode *inode = &ci->vfs_inode; | 1304 | struct inode *inode = &ci->vfs_inode; |
| 1303 | int was = ci->i_dirty_caps; | 1305 | int was = ci->i_dirty_caps; |
| 1304 | int dirty = 0; | 1306 | int dirty = 0; |
| @@ -1336,7 +1338,7 @@ void __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask) | |||
| 1336 | static int __mark_caps_flushing(struct inode *inode, | 1338 | static int __mark_caps_flushing(struct inode *inode, |
| 1337 | struct ceph_mds_session *session) | 1339 | struct ceph_mds_session *session) |
| 1338 | { | 1340 | { |
| 1339 | struct ceph_mds_client *mdsc = &ceph_client(inode->i_sb)->mdsc; | 1341 | struct ceph_mds_client *mdsc = &ceph_sb_to_client(inode->i_sb)->mdsc; |
| 1340 | struct ceph_inode_info *ci = ceph_inode(inode); | 1342 | struct ceph_inode_info *ci = ceph_inode(inode); |
| 1341 | int flushing; | 1343 | int flushing; |
| 1342 | 1344 | ||
| @@ -1663,7 +1665,7 @@ ack: | |||
| 1663 | static int try_flush_caps(struct inode *inode, struct ceph_mds_session *session, | 1665 | static int try_flush_caps(struct inode *inode, struct ceph_mds_session *session, |
| 1664 | unsigned *flush_tid) | 1666 | unsigned *flush_tid) |
| 1665 | { | 1667 | { |
| 1666 | struct ceph_mds_client *mdsc = &ceph_client(inode->i_sb)->mdsc; | 1668 | struct ceph_mds_client *mdsc = &ceph_sb_to_client(inode->i_sb)->mdsc; |
| 1667 | struct ceph_inode_info *ci = ceph_inode(inode); | 1669 | struct ceph_inode_info *ci = ceph_inode(inode); |
| 1668 | int unlock_session = session ? 0 : 1; | 1670 | int unlock_session = session ? 0 : 1; |
| 1669 | int flushing = 0; | 1671 | int flushing = 0; |
| @@ -1716,10 +1718,9 @@ out_unlocked: | |||
| 1716 | static int caps_are_flushed(struct inode *inode, unsigned tid) | 1718 | static int caps_are_flushed(struct inode *inode, unsigned tid) |
| 1717 | { | 1719 | { |
| 1718 | struct ceph_inode_info *ci = ceph_inode(inode); | 1720 | struct ceph_inode_info *ci = ceph_inode(inode); |
| 1719 | int dirty, i, ret = 1; | 1721 | int i, ret = 1; |
| 1720 | 1722 | ||
| 1721 | spin_lock(&inode->i_lock); | 1723 | spin_lock(&inode->i_lock); |
| 1722 | dirty = __ceph_caps_dirty(ci); | ||
| 1723 | for (i = 0; i < CEPH_CAP_BITS; i++) | 1724 | for (i = 0; i < CEPH_CAP_BITS; i++) |
| 1724 | if ((ci->i_flushing_caps & (1 << i)) && | 1725 | if ((ci->i_flushing_caps & (1 << i)) && |
| 1725 | ci->i_cap_flush_tid[i] <= tid) { | 1726 | ci->i_cap_flush_tid[i] <= tid) { |
| @@ -1829,7 +1830,8 @@ int ceph_write_inode(struct inode *inode, struct writeback_control *wbc) | |||
| 1829 | err = wait_event_interruptible(ci->i_cap_wq, | 1830 | err = wait_event_interruptible(ci->i_cap_wq, |
| 1830 | caps_are_flushed(inode, flush_tid)); | 1831 | caps_are_flushed(inode, flush_tid)); |
| 1831 | } else { | 1832 | } else { |
| 1832 | struct ceph_mds_client *mdsc = &ceph_client(inode->i_sb)->mdsc; | 1833 | struct ceph_mds_client *mdsc = |
| 1834 | &ceph_sb_to_client(inode->i_sb)->mdsc; | ||
| 1833 | 1835 | ||
| 1834 | spin_lock(&inode->i_lock); | 1836 | spin_lock(&inode->i_lock); |
| 1835 | if (__ceph_caps_dirty(ci)) | 1837 | if (__ceph_caps_dirty(ci)) |
| @@ -2411,7 +2413,7 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid, | |||
| 2411 | __releases(inode->i_lock) | 2413 | __releases(inode->i_lock) |
| 2412 | { | 2414 | { |
| 2413 | struct ceph_inode_info *ci = ceph_inode(inode); | 2415 | struct ceph_inode_info *ci = ceph_inode(inode); |
| 2414 | struct ceph_mds_client *mdsc = &ceph_client(inode->i_sb)->mdsc; | 2416 | struct ceph_mds_client *mdsc = &ceph_sb_to_client(inode->i_sb)->mdsc; |
| 2415 | unsigned seq = le32_to_cpu(m->seq); | 2417 | unsigned seq = le32_to_cpu(m->seq); |
| 2416 | int dirty = le32_to_cpu(m->dirty); | 2418 | int dirty = le32_to_cpu(m->dirty); |
| 2417 | int cleaned = 0; | 2419 | int cleaned = 0; |
diff --git a/fs/ceph/ceph_fs.h b/fs/ceph/ceph_fs.h index 0c2241ef3653..3b9eeed097b3 100644 --- a/fs/ceph/ceph_fs.h +++ b/fs/ceph/ceph_fs.h | |||
| @@ -19,7 +19,7 @@ | |||
| 19 | * Ceph release version | 19 | * Ceph release version |
| 20 | */ | 20 | */ |
| 21 | #define CEPH_VERSION_MAJOR 0 | 21 | #define CEPH_VERSION_MAJOR 0 |
| 22 | #define CEPH_VERSION_MINOR 19 | 22 | #define CEPH_VERSION_MINOR 20 |
| 23 | #define CEPH_VERSION_PATCH 0 | 23 | #define CEPH_VERSION_PATCH 0 |
| 24 | 24 | ||
| 25 | #define _CEPH_STRINGIFY(x) #x | 25 | #define _CEPH_STRINGIFY(x) #x |
| @@ -36,7 +36,7 @@ | |||
| 36 | * client-facing protocol. | 36 | * client-facing protocol. |
| 37 | */ | 37 | */ |
| 38 | #define CEPH_OSD_PROTOCOL 8 /* cluster internal */ | 38 | #define CEPH_OSD_PROTOCOL 8 /* cluster internal */ |
| 39 | #define CEPH_MDS_PROTOCOL 9 /* cluster internal */ | 39 | #define CEPH_MDS_PROTOCOL 12 /* cluster internal */ |
| 40 | #define CEPH_MON_PROTOCOL 5 /* cluster internal */ | 40 | #define CEPH_MON_PROTOCOL 5 /* cluster internal */ |
| 41 | #define CEPH_OSDC_PROTOCOL 24 /* server/client */ | 41 | #define CEPH_OSDC_PROTOCOL 24 /* server/client */ |
| 42 | #define CEPH_MDSC_PROTOCOL 32 /* server/client */ | 42 | #define CEPH_MDSC_PROTOCOL 32 /* server/client */ |
| @@ -53,8 +53,18 @@ | |||
| 53 | /* | 53 | /* |
| 54 | * feature bits | 54 | * feature bits |
| 55 | */ | 55 | */ |
| 56 | #define CEPH_FEATURE_SUPPORTED 0 | 56 | #define CEPH_FEATURE_UID 1 |
| 57 | #define CEPH_FEATURE_REQUIRED 0 | 57 | #define CEPH_FEATURE_NOSRCADDR 2 |
| 58 | #define CEPH_FEATURE_FLOCK 4 | ||
| 59 | |||
| 60 | #define CEPH_FEATURE_SUPPORTED_MON CEPH_FEATURE_UID|CEPH_FEATURE_NOSRCADDR | ||
| 61 | #define CEPH_FEATURE_REQUIRED_MON CEPH_FEATURE_UID | ||
| 62 | #define CEPH_FEATURE_SUPPORTED_MDS CEPH_FEATURE_UID|CEPH_FEATURE_NOSRCADDR|CEPH_FEATURE_FLOCK | ||
| 63 | #define CEPH_FEATURE_REQUIRED_MDS CEPH_FEATURE_UID | ||
| 64 | #define CEPH_FEATURE_SUPPORTED_OSD CEPH_FEATURE_UID|CEPH_FEATURE_NOSRCADDR | ||
| 65 | #define CEPH_FEATURE_REQUIRED_OSD CEPH_FEATURE_UID | ||
| 66 | #define CEPH_FEATURE_SUPPORTED_CLIENT CEPH_FEATURE_NOSRCADDR | ||
| 67 | #define CEPH_FEATURE_REQUIRED_CLIENT CEPH_FEATURE_NOSRCADDR | ||
| 58 | 68 | ||
| 59 | 69 | ||
| 60 | /* | 70 | /* |
| @@ -91,6 +101,8 @@ int ceph_file_layout_is_valid(const struct ceph_file_layout *layout); | |||
| 91 | #define CEPH_AUTH_NONE 0x1 | 101 | #define CEPH_AUTH_NONE 0x1 |
| 92 | #define CEPH_AUTH_CEPHX 0x2 | 102 | #define CEPH_AUTH_CEPHX 0x2 |
| 93 | 103 | ||
| 104 | #define CEPH_AUTH_UID_DEFAULT ((__u64) -1) | ||
| 105 | |||
| 94 | 106 | ||
| 95 | /********************************************* | 107 | /********************************************* |
| 96 | * message layer | 108 | * message layer |
| @@ -128,11 +140,27 @@ int ceph_file_layout_is_valid(const struct ceph_file_layout *layout); | |||
| 128 | #define CEPH_MSG_CLIENT_SNAP 0x312 | 140 | #define CEPH_MSG_CLIENT_SNAP 0x312 |
| 129 | #define CEPH_MSG_CLIENT_CAPRELEASE 0x313 | 141 | #define CEPH_MSG_CLIENT_CAPRELEASE 0x313 |
| 130 | 142 | ||
| 143 | /* pool ops */ | ||
| 144 | #define CEPH_MSG_POOLOP_REPLY 48 | ||
| 145 | #define CEPH_MSG_POOLOP 49 | ||
| 146 | |||
| 147 | |||
| 131 | /* osd */ | 148 | /* osd */ |
| 132 | #define CEPH_MSG_OSD_MAP 41 | 149 | #define CEPH_MSG_OSD_MAP 41 |
| 133 | #define CEPH_MSG_OSD_OP 42 | 150 | #define CEPH_MSG_OSD_OP 42 |
| 134 | #define CEPH_MSG_OSD_OPREPLY 43 | 151 | #define CEPH_MSG_OSD_OPREPLY 43 |
| 135 | 152 | ||
| 153 | /* pool operations */ | ||
| 154 | enum { | ||
| 155 | POOL_OP_CREATE = 0x01, | ||
| 156 | POOL_OP_DELETE = 0x02, | ||
| 157 | POOL_OP_AUID_CHANGE = 0x03, | ||
| 158 | POOL_OP_CREATE_SNAP = 0x11, | ||
| 159 | POOL_OP_DELETE_SNAP = 0x12, | ||
| 160 | POOL_OP_CREATE_UNMANAGED_SNAP = 0x21, | ||
| 161 | POOL_OP_DELETE_UNMANAGED_SNAP = 0x22, | ||
| 162 | }; | ||
| 163 | |||
| 136 | struct ceph_mon_request_header { | 164 | struct ceph_mon_request_header { |
| 137 | __le64 have_version; | 165 | __le64 have_version; |
| 138 | __le16 session_mon; | 166 | __le16 session_mon; |
| @@ -155,6 +183,31 @@ struct ceph_mon_statfs_reply { | |||
| 155 | struct ceph_statfs st; | 183 | struct ceph_statfs st; |
| 156 | } __attribute__ ((packed)); | 184 | } __attribute__ ((packed)); |
| 157 | 185 | ||
| 186 | const char *ceph_pool_op_name(int op); | ||
| 187 | |||
| 188 | struct ceph_mon_poolop { | ||
| 189 | struct ceph_mon_request_header monhdr; | ||
| 190 | struct ceph_fsid fsid; | ||
| 191 | __le32 pool; | ||
| 192 | __le32 op; | ||
| 193 | __le64 auid; | ||
| 194 | __le64 snapid; | ||
| 195 | __le32 name_len; | ||
| 196 | } __attribute__ ((packed)); | ||
| 197 | |||
| 198 | struct ceph_mon_poolop_reply { | ||
| 199 | struct ceph_mon_request_header monhdr; | ||
| 200 | struct ceph_fsid fsid; | ||
| 201 | __le32 reply_code; | ||
| 202 | __le32 epoch; | ||
| 203 | char has_data; | ||
| 204 | char data[0]; | ||
| 205 | } __attribute__ ((packed)); | ||
| 206 | |||
| 207 | struct ceph_mon_unmanaged_snap { | ||
| 208 | __le64 snapid; | ||
| 209 | } __attribute__ ((packed)); | ||
| 210 | |||
| 158 | struct ceph_osd_getmap { | 211 | struct ceph_osd_getmap { |
| 159 | struct ceph_mon_request_header monhdr; | 212 | struct ceph_mon_request_header monhdr; |
| 160 | struct ceph_fsid fsid; | 213 | struct ceph_fsid fsid; |
| @@ -308,6 +361,7 @@ union ceph_mds_request_args { | |||
| 308 | struct { | 361 | struct { |
| 309 | __le32 frag; /* which dir fragment */ | 362 | __le32 frag; /* which dir fragment */ |
| 310 | __le32 max_entries; /* how many dentries to grab */ | 363 | __le32 max_entries; /* how many dentries to grab */ |
| 364 | __le32 max_bytes; | ||
| 311 | } __attribute__ ((packed)) readdir; | 365 | } __attribute__ ((packed)) readdir; |
| 312 | struct { | 366 | struct { |
| 313 | __le32 mode; | 367 | __le32 mode; |
diff --git a/fs/ceph/ceph_strings.c b/fs/ceph/ceph_strings.c index 8e4be6a80c62..7503aee828ce 100644 --- a/fs/ceph/ceph_strings.c +++ b/fs/ceph/ceph_strings.c | |||
| @@ -10,7 +10,6 @@ const char *ceph_entity_type_name(int type) | |||
| 10 | case CEPH_ENTITY_TYPE_OSD: return "osd"; | 10 | case CEPH_ENTITY_TYPE_OSD: return "osd"; |
| 11 | case CEPH_ENTITY_TYPE_MON: return "mon"; | 11 | case CEPH_ENTITY_TYPE_MON: return "mon"; |
| 12 | case CEPH_ENTITY_TYPE_CLIENT: return "client"; | 12 | case CEPH_ENTITY_TYPE_CLIENT: return "client"; |
| 13 | case CEPH_ENTITY_TYPE_ADMIN: return "admin"; | ||
| 14 | case CEPH_ENTITY_TYPE_AUTH: return "auth"; | 13 | case CEPH_ENTITY_TYPE_AUTH: return "auth"; |
| 15 | default: return "unknown"; | 14 | default: return "unknown"; |
| 16 | } | 15 | } |
| @@ -45,6 +44,7 @@ const char *ceph_osd_op_name(int op) | |||
| 45 | case CEPH_OSD_OP_SETXATTRS: return "setxattrs"; | 44 | case CEPH_OSD_OP_SETXATTRS: return "setxattrs"; |
| 46 | case CEPH_OSD_OP_RESETXATTRS: return "resetxattrs"; | 45 | case CEPH_OSD_OP_RESETXATTRS: return "resetxattrs"; |
| 47 | case CEPH_OSD_OP_RMXATTR: return "rmxattr"; | 46 | case CEPH_OSD_OP_RMXATTR: return "rmxattr"; |
| 47 | case CEPH_OSD_OP_CMPXATTR: return "cmpxattr"; | ||
| 48 | 48 | ||
| 49 | case CEPH_OSD_OP_PULL: return "pull"; | 49 | case CEPH_OSD_OP_PULL: return "pull"; |
| 50 | case CEPH_OSD_OP_PUSH: return "push"; | 50 | case CEPH_OSD_OP_PUSH: return "push"; |
| @@ -174,3 +174,17 @@ const char *ceph_snap_op_name(int o) | |||
| 174 | } | 174 | } |
| 175 | return "???"; | 175 | return "???"; |
| 176 | } | 176 | } |
| 177 | |||
| 178 | const char *ceph_pool_op_name(int op) | ||
| 179 | { | ||
| 180 | switch (op) { | ||
| 181 | case POOL_OP_CREATE: return "create"; | ||
| 182 | case POOL_OP_DELETE: return "delete"; | ||
| 183 | case POOL_OP_AUID_CHANGE: return "auid change"; | ||
| 184 | case POOL_OP_CREATE_SNAP: return "create snap"; | ||
| 185 | case POOL_OP_DELETE_SNAP: return "delete snap"; | ||
| 186 | case POOL_OP_CREATE_UNMANAGED_SNAP: return "create unmanaged snap"; | ||
| 187 | case POOL_OP_DELETE_UNMANAGED_SNAP: return "delete unmanaged snap"; | ||
| 188 | } | ||
| 189 | return "???"; | ||
| 190 | } | ||
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c index f7048da92acc..3be33fb066cc 100644 --- a/fs/ceph/debugfs.c +++ b/fs/ceph/debugfs.c | |||
| @@ -113,7 +113,7 @@ static int osdmap_show(struct seq_file *s, void *p) | |||
| 113 | static int monc_show(struct seq_file *s, void *p) | 113 | static int monc_show(struct seq_file *s, void *p) |
| 114 | { | 114 | { |
| 115 | struct ceph_client *client = s->private; | 115 | struct ceph_client *client = s->private; |
| 116 | struct ceph_mon_statfs_request *req; | 116 | struct ceph_mon_generic_request *req; |
| 117 | struct ceph_mon_client *monc = &client->monc; | 117 | struct ceph_mon_client *monc = &client->monc; |
| 118 | struct rb_node *rp; | 118 | struct rb_node *rp; |
| 119 | 119 | ||
| @@ -126,9 +126,14 @@ static int monc_show(struct seq_file *s, void *p) | |||
| 126 | if (monc->want_next_osdmap) | 126 | if (monc->want_next_osdmap) |
| 127 | seq_printf(s, "want next osdmap\n"); | 127 | seq_printf(s, "want next osdmap\n"); |
| 128 | 128 | ||
| 129 | for (rp = rb_first(&monc->statfs_request_tree); rp; rp = rb_next(rp)) { | 129 | for (rp = rb_first(&monc->generic_request_tree); rp; rp = rb_next(rp)) { |
| 130 | req = rb_entry(rp, struct ceph_mon_statfs_request, node); | 130 | __u16 op; |
| 131 | seq_printf(s, "%lld statfs\n", req->tid); | 131 | req = rb_entry(rp, struct ceph_mon_generic_request, node); |
| 132 | op = le16_to_cpu(req->request->hdr.type); | ||
| 133 | if (op == CEPH_MSG_STATFS) | ||
| 134 | seq_printf(s, "%lld statfs\n", req->tid); | ||
| 135 | else | ||
| 136 | seq_printf(s, "%lld unknown\n", req->tid); | ||
| 132 | } | 137 | } |
| 133 | 138 | ||
| 134 | mutex_unlock(&monc->mutex); | 139 | mutex_unlock(&monc->mutex); |
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 650d2db5ed26..4fd30900eff7 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c | |||
| @@ -51,8 +51,11 @@ int ceph_init_dentry(struct dentry *dentry) | |||
| 51 | return -ENOMEM; /* oh well */ | 51 | return -ENOMEM; /* oh well */ |
| 52 | 52 | ||
| 53 | spin_lock(&dentry->d_lock); | 53 | spin_lock(&dentry->d_lock); |
| 54 | if (dentry->d_fsdata) /* lost a race */ | 54 | if (dentry->d_fsdata) { |
| 55 | /* lost a race */ | ||
| 56 | kmem_cache_free(ceph_dentry_cachep, di); | ||
| 55 | goto out_unlock; | 57 | goto out_unlock; |
| 58 | } | ||
| 56 | di->dentry = dentry; | 59 | di->dentry = dentry; |
| 57 | di->lease_session = NULL; | 60 | di->lease_session = NULL; |
| 58 | dentry->d_fsdata = di; | 61 | dentry->d_fsdata = di; |
| @@ -125,7 +128,8 @@ more: | |||
| 125 | dentry = list_entry(p, struct dentry, d_u.d_child); | 128 | dentry = list_entry(p, struct dentry, d_u.d_child); |
| 126 | di = ceph_dentry(dentry); | 129 | di = ceph_dentry(dentry); |
| 127 | while (1) { | 130 | while (1) { |
| 128 | dout(" p %p/%p d_subdirs %p/%p\n", p->prev, p->next, | 131 | dout(" p %p/%p %s d_subdirs %p/%p\n", p->prev, p->next, |
| 132 | d_unhashed(dentry) ? "!hashed" : "hashed", | ||
| 129 | parent->d_subdirs.prev, parent->d_subdirs.next); | 133 | parent->d_subdirs.prev, parent->d_subdirs.next); |
| 130 | if (p == &parent->d_subdirs) { | 134 | if (p == &parent->d_subdirs) { |
| 131 | fi->at_end = 1; | 135 | fi->at_end = 1; |
| @@ -229,6 +233,7 @@ static int ceph_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
| 229 | u32 ftype; | 233 | u32 ftype; |
| 230 | struct ceph_mds_reply_info_parsed *rinfo; | 234 | struct ceph_mds_reply_info_parsed *rinfo; |
| 231 | const int max_entries = client->mount_args->max_readdir; | 235 | const int max_entries = client->mount_args->max_readdir; |
| 236 | const int max_bytes = client->mount_args->max_readdir_bytes; | ||
| 232 | 237 | ||
| 233 | dout("readdir %p filp %p frag %u off %u\n", inode, filp, frag, off); | 238 | dout("readdir %p filp %p frag %u off %u\n", inode, filp, frag, off); |
| 234 | if (fi->at_end) | 239 | if (fi->at_end) |
| @@ -312,6 +317,7 @@ more: | |||
| 312 | req->r_readdir_offset = fi->next_offset; | 317 | req->r_readdir_offset = fi->next_offset; |
| 313 | req->r_args.readdir.frag = cpu_to_le32(frag); | 318 | req->r_args.readdir.frag = cpu_to_le32(frag); |
| 314 | req->r_args.readdir.max_entries = cpu_to_le32(max_entries); | 319 | req->r_args.readdir.max_entries = cpu_to_le32(max_entries); |
| 320 | req->r_args.readdir.max_bytes = cpu_to_le32(max_bytes); | ||
| 315 | req->r_num_caps = max_entries + 1; | 321 | req->r_num_caps = max_entries + 1; |
| 316 | err = ceph_mdsc_do_request(mdsc, NULL, req); | 322 | err = ceph_mdsc_do_request(mdsc, NULL, req); |
| 317 | if (err < 0) { | 323 | if (err < 0) { |
| @@ -335,7 +341,7 @@ more: | |||
| 335 | if (req->r_reply_info.dir_end) { | 341 | if (req->r_reply_info.dir_end) { |
| 336 | kfree(fi->last_name); | 342 | kfree(fi->last_name); |
| 337 | fi->last_name = NULL; | 343 | fi->last_name = NULL; |
| 338 | fi->next_offset = 0; | 344 | fi->next_offset = 2; |
| 339 | } else { | 345 | } else { |
| 340 | rinfo = &req->r_reply_info; | 346 | rinfo = &req->r_reply_info; |
| 341 | err = note_last_dentry(fi, | 347 | err = note_last_dentry(fi, |
| @@ -478,7 +484,7 @@ static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int origin) | |||
| 478 | struct dentry *ceph_finish_lookup(struct ceph_mds_request *req, | 484 | struct dentry *ceph_finish_lookup(struct ceph_mds_request *req, |
| 479 | struct dentry *dentry, int err) | 485 | struct dentry *dentry, int err) |
| 480 | { | 486 | { |
| 481 | struct ceph_client *client = ceph_client(dentry->d_sb); | 487 | struct ceph_client *client = ceph_sb_to_client(dentry->d_sb); |
| 482 | struct inode *parent = dentry->d_parent->d_inode; | 488 | struct inode *parent = dentry->d_parent->d_inode; |
| 483 | 489 | ||
| 484 | /* .snap dir? */ | 490 | /* .snap dir? */ |
| @@ -568,7 +574,6 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry, | |||
| 568 | !is_root_ceph_dentry(dir, dentry) && | 574 | !is_root_ceph_dentry(dir, dentry) && |
| 569 | (ci->i_ceph_flags & CEPH_I_COMPLETE) && | 575 | (ci->i_ceph_flags & CEPH_I_COMPLETE) && |
| 570 | (__ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1))) { | 576 | (__ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1))) { |
| 571 | di->offset = ci->i_max_offset++; | ||
| 572 | spin_unlock(&dir->i_lock); | 577 | spin_unlock(&dir->i_lock); |
| 573 | dout(" dir %p complete, -ENOENT\n", dir); | 578 | dout(" dir %p complete, -ENOENT\n", dir); |
| 574 | d_add(dentry, NULL); | 579 | d_add(dentry, NULL); |
| @@ -888,13 +893,22 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
| 888 | 893 | ||
| 889 | /* ensure target dentry is invalidated, despite | 894 | /* ensure target dentry is invalidated, despite |
| 890 | rehashing bug in vfs_rename_dir */ | 895 | rehashing bug in vfs_rename_dir */ |
| 891 | new_dentry->d_time = jiffies; | 896 | ceph_invalidate_dentry_lease(new_dentry); |
| 892 | ceph_dentry(new_dentry)->lease_shared_gen = 0; | ||
| 893 | } | 897 | } |
| 894 | ceph_mdsc_put_request(req); | 898 | ceph_mdsc_put_request(req); |
| 895 | return err; | 899 | return err; |
| 896 | } | 900 | } |
| 897 | 901 | ||
| 902 | /* | ||
| 903 | * Ensure a dentry lease will no longer revalidate. | ||
| 904 | */ | ||
| 905 | void ceph_invalidate_dentry_lease(struct dentry *dentry) | ||
| 906 | { | ||
| 907 | spin_lock(&dentry->d_lock); | ||
| 908 | dentry->d_time = jiffies; | ||
| 909 | ceph_dentry(dentry)->lease_shared_gen = 0; | ||
| 910 | spin_unlock(&dentry->d_lock); | ||
| 911 | } | ||
| 898 | 912 | ||
| 899 | /* | 913 | /* |
| 900 | * Check if dentry lease is valid. If not, delete the lease. Try to | 914 | * Check if dentry lease is valid. If not, delete the lease. Try to |
| @@ -972,8 +986,9 @@ static int ceph_d_revalidate(struct dentry *dentry, struct nameidata *nd) | |||
| 972 | { | 986 | { |
| 973 | struct inode *dir = dentry->d_parent->d_inode; | 987 | struct inode *dir = dentry->d_parent->d_inode; |
| 974 | 988 | ||
| 975 | dout("d_revalidate %p '%.*s' inode %p\n", dentry, | 989 | dout("d_revalidate %p '%.*s' inode %p offset %lld\n", dentry, |
| 976 | dentry->d_name.len, dentry->d_name.name, dentry->d_inode); | 990 | dentry->d_name.len, dentry->d_name.name, dentry->d_inode, |
| 991 | ceph_dentry(dentry)->offset); | ||
| 977 | 992 | ||
| 978 | /* always trust cached snapped dentries, snapdir dentry */ | 993 | /* always trust cached snapped dentries, snapdir dentry */ |
| 979 | if (ceph_snap(dir) != CEPH_NOSNAP) { | 994 | if (ceph_snap(dir) != CEPH_NOSNAP) { |
| @@ -1050,7 +1065,7 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size, | |||
| 1050 | struct ceph_inode_info *ci = ceph_inode(inode); | 1065 | struct ceph_inode_info *ci = ceph_inode(inode); |
| 1051 | int left; | 1066 | int left; |
| 1052 | 1067 | ||
| 1053 | if (!ceph_test_opt(ceph_client(inode->i_sb), DIRSTAT)) | 1068 | if (!ceph_test_opt(ceph_sb_to_client(inode->i_sb), DIRSTAT)) |
| 1054 | return -EISDIR; | 1069 | return -EISDIR; |
| 1055 | 1070 | ||
| 1056 | if (!cf->dir_info) { | 1071 | if (!cf->dir_info) { |
| @@ -1152,7 +1167,7 @@ void ceph_dentry_lru_add(struct dentry *dn) | |||
| 1152 | dout("dentry_lru_add %p %p '%.*s'\n", di, dn, | 1167 | dout("dentry_lru_add %p %p '%.*s'\n", di, dn, |
| 1153 | dn->d_name.len, dn->d_name.name); | 1168 | dn->d_name.len, dn->d_name.name); |
| 1154 | if (di) { | 1169 | if (di) { |
| 1155 | mdsc = &ceph_client(dn->d_sb)->mdsc; | 1170 | mdsc = &ceph_sb_to_client(dn->d_sb)->mdsc; |
| 1156 | spin_lock(&mdsc->dentry_lru_lock); | 1171 | spin_lock(&mdsc->dentry_lru_lock); |
| 1157 | list_add_tail(&di->lru, &mdsc->dentry_lru); | 1172 | list_add_tail(&di->lru, &mdsc->dentry_lru); |
| 1158 | mdsc->num_dentry++; | 1173 | mdsc->num_dentry++; |
| @@ -1165,10 +1180,10 @@ void ceph_dentry_lru_touch(struct dentry *dn) | |||
| 1165 | struct ceph_dentry_info *di = ceph_dentry(dn); | 1180 | struct ceph_dentry_info *di = ceph_dentry(dn); |
| 1166 | struct ceph_mds_client *mdsc; | 1181 | struct ceph_mds_client *mdsc; |
| 1167 | 1182 | ||
| 1168 | dout("dentry_lru_touch %p %p '%.*s'\n", di, dn, | 1183 | dout("dentry_lru_touch %p %p '%.*s' (offset %lld)\n", di, dn, |
| 1169 | dn->d_name.len, dn->d_name.name); | 1184 | dn->d_name.len, dn->d_name.name, di->offset); |
| 1170 | if (di) { | 1185 | if (di) { |
| 1171 | mdsc = &ceph_client(dn->d_sb)->mdsc; | 1186 | mdsc = &ceph_sb_to_client(dn->d_sb)->mdsc; |
| 1172 | spin_lock(&mdsc->dentry_lru_lock); | 1187 | spin_lock(&mdsc->dentry_lru_lock); |
| 1173 | list_move_tail(&di->lru, &mdsc->dentry_lru); | 1188 | list_move_tail(&di->lru, &mdsc->dentry_lru); |
| 1174 | spin_unlock(&mdsc->dentry_lru_lock); | 1189 | spin_unlock(&mdsc->dentry_lru_lock); |
| @@ -1183,7 +1198,7 @@ void ceph_dentry_lru_del(struct dentry *dn) | |||
| 1183 | dout("dentry_lru_del %p %p '%.*s'\n", di, dn, | 1198 | dout("dentry_lru_del %p %p '%.*s'\n", di, dn, |
| 1184 | dn->d_name.len, dn->d_name.name); | 1199 | dn->d_name.len, dn->d_name.name); |
| 1185 | if (di) { | 1200 | if (di) { |
| 1186 | mdsc = &ceph_client(dn->d_sb)->mdsc; | 1201 | mdsc = &ceph_sb_to_client(dn->d_sb)->mdsc; |
| 1187 | spin_lock(&mdsc->dentry_lru_lock); | 1202 | spin_lock(&mdsc->dentry_lru_lock); |
| 1188 | list_del_init(&di->lru); | 1203 | list_del_init(&di->lru); |
| 1189 | mdsc->num_dentry--; | 1204 | mdsc->num_dentry--; |
diff --git a/fs/ceph/export.c b/fs/ceph/export.c index 9d67572fb328..17447644d675 100644 --- a/fs/ceph/export.c +++ b/fs/ceph/export.c | |||
| @@ -93,11 +93,11 @@ static struct dentry *__fh_to_dentry(struct super_block *sb, | |||
| 93 | return ERR_PTR(-ESTALE); | 93 | return ERR_PTR(-ESTALE); |
| 94 | 94 | ||
| 95 | dentry = d_obtain_alias(inode); | 95 | dentry = d_obtain_alias(inode); |
| 96 | if (!dentry) { | 96 | if (IS_ERR(dentry)) { |
| 97 | pr_err("fh_to_dentry %llx -- inode %p but ENOMEM\n", | 97 | pr_err("fh_to_dentry %llx -- inode %p but ENOMEM\n", |
| 98 | fh->ino, inode); | 98 | fh->ino, inode); |
| 99 | iput(inode); | 99 | iput(inode); |
| 100 | return ERR_PTR(-ENOMEM); | 100 | return dentry; |
| 101 | } | 101 | } |
| 102 | err = ceph_init_dentry(dentry); | 102 | err = ceph_init_dentry(dentry); |
| 103 | 103 | ||
| @@ -115,7 +115,7 @@ static struct dentry *__fh_to_dentry(struct super_block *sb, | |||
| 115 | static struct dentry *__cfh_to_dentry(struct super_block *sb, | 115 | static struct dentry *__cfh_to_dentry(struct super_block *sb, |
| 116 | struct ceph_nfs_confh *cfh) | 116 | struct ceph_nfs_confh *cfh) |
| 117 | { | 117 | { |
| 118 | struct ceph_mds_client *mdsc = &ceph_client(sb)->mdsc; | 118 | struct ceph_mds_client *mdsc = &ceph_sb_to_client(sb)->mdsc; |
| 119 | struct inode *inode; | 119 | struct inode *inode; |
| 120 | struct dentry *dentry; | 120 | struct dentry *dentry; |
| 121 | struct ceph_vino vino; | 121 | struct ceph_vino vino; |
| @@ -149,11 +149,11 @@ static struct dentry *__cfh_to_dentry(struct super_block *sb, | |||
| 149 | } | 149 | } |
| 150 | 150 | ||
| 151 | dentry = d_obtain_alias(inode); | 151 | dentry = d_obtain_alias(inode); |
| 152 | if (!dentry) { | 152 | if (IS_ERR(dentry)) { |
| 153 | pr_err("cfh_to_dentry %llx -- inode %p but ENOMEM\n", | 153 | pr_err("cfh_to_dentry %llx -- inode %p but ENOMEM\n", |
| 154 | cfh->ino, inode); | 154 | cfh->ino, inode); |
| 155 | iput(inode); | 155 | iput(inode); |
| 156 | return ERR_PTR(-ENOMEM); | 156 | return dentry; |
| 157 | } | 157 | } |
| 158 | err = ceph_init_dentry(dentry); | 158 | err = ceph_init_dentry(dentry); |
| 159 | if (err < 0) { | 159 | if (err < 0) { |
| @@ -202,11 +202,11 @@ static struct dentry *ceph_fh_to_parent(struct super_block *sb, | |||
| 202 | return ERR_PTR(-ESTALE); | 202 | return ERR_PTR(-ESTALE); |
| 203 | 203 | ||
| 204 | dentry = d_obtain_alias(inode); | 204 | dentry = d_obtain_alias(inode); |
| 205 | if (!dentry) { | 205 | if (IS_ERR(dentry)) { |
| 206 | pr_err("fh_to_parent %llx -- inode %p but ENOMEM\n", | 206 | pr_err("fh_to_parent %llx -- inode %p but ENOMEM\n", |
| 207 | cfh->ino, inode); | 207 | cfh->ino, inode); |
| 208 | iput(inode); | 208 | iput(inode); |
| 209 | return ERR_PTR(-ENOMEM); | 209 | return dentry; |
| 210 | } | 210 | } |
| 211 | err = ceph_init_dentry(dentry); | 211 | err = ceph_init_dentry(dentry); |
| 212 | if (err < 0) { | 212 | if (err < 0) { |
diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 7d634938edc9..6512b6701b9e 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c | |||
| @@ -317,16 +317,16 @@ void ceph_release_page_vector(struct page **pages, int num_pages) | |||
| 317 | /* | 317 | /* |
| 318 | * allocate a vector new pages | 318 | * allocate a vector new pages |
| 319 | */ | 319 | */ |
| 320 | static struct page **alloc_page_vector(int num_pages) | 320 | struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags) |
| 321 | { | 321 | { |
| 322 | struct page **pages; | 322 | struct page **pages; |
| 323 | int i; | 323 | int i; |
| 324 | 324 | ||
| 325 | pages = kmalloc(sizeof(*pages) * num_pages, GFP_NOFS); | 325 | pages = kmalloc(sizeof(*pages) * num_pages, flags); |
| 326 | if (!pages) | 326 | if (!pages) |
| 327 | return ERR_PTR(-ENOMEM); | 327 | return ERR_PTR(-ENOMEM); |
| 328 | for (i = 0; i < num_pages; i++) { | 328 | for (i = 0; i < num_pages; i++) { |
| 329 | pages[i] = alloc_page(GFP_NOFS); | 329 | pages[i] = __page_cache_alloc(flags); |
| 330 | if (pages[i] == NULL) { | 330 | if (pages[i] == NULL) { |
| 331 | ceph_release_page_vector(pages, i); | 331 | ceph_release_page_vector(pages, i); |
| 332 | return ERR_PTR(-ENOMEM); | 332 | return ERR_PTR(-ENOMEM); |
| @@ -540,7 +540,7 @@ static ssize_t ceph_sync_read(struct file *file, char __user *data, | |||
| 540 | * in sequence. | 540 | * in sequence. |
| 541 | */ | 541 | */ |
| 542 | } else { | 542 | } else { |
| 543 | pages = alloc_page_vector(num_pages); | 543 | pages = ceph_alloc_page_vector(num_pages, GFP_NOFS); |
| 544 | } | 544 | } |
| 545 | if (IS_ERR(pages)) | 545 | if (IS_ERR(pages)) |
| 546 | return PTR_ERR(pages); | 546 | return PTR_ERR(pages); |
| @@ -649,8 +649,8 @@ more: | |||
| 649 | do_sync, | 649 | do_sync, |
| 650 | ci->i_truncate_seq, ci->i_truncate_size, | 650 | ci->i_truncate_seq, ci->i_truncate_size, |
| 651 | &mtime, false, 2); | 651 | &mtime, false, 2); |
| 652 | if (IS_ERR(req)) | 652 | if (!req) |
| 653 | return PTR_ERR(req); | 653 | return -ENOMEM; |
| 654 | 654 | ||
| 655 | num_pages = calc_pages_for(pos, len); | 655 | num_pages = calc_pages_for(pos, len); |
| 656 | 656 | ||
| @@ -668,7 +668,7 @@ more: | |||
| 668 | truncate_inode_pages_range(inode->i_mapping, pos, | 668 | truncate_inode_pages_range(inode->i_mapping, pos, |
| 669 | (pos+len) | (PAGE_CACHE_SIZE-1)); | 669 | (pos+len) | (PAGE_CACHE_SIZE-1)); |
| 670 | } else { | 670 | } else { |
| 671 | pages = alloc_page_vector(num_pages); | 671 | pages = ceph_alloc_page_vector(num_pages, GFP_NOFS); |
| 672 | if (IS_ERR(pages)) { | 672 | if (IS_ERR(pages)) { |
| 673 | ret = PTR_ERR(pages); | 673 | ret = PTR_ERR(pages); |
| 674 | goto out; | 674 | goto out; |
| @@ -809,7 +809,7 @@ static ssize_t ceph_aio_write(struct kiocb *iocb, const struct iovec *iov, | |||
| 809 | struct file *file = iocb->ki_filp; | 809 | struct file *file = iocb->ki_filp; |
| 810 | struct inode *inode = file->f_dentry->d_inode; | 810 | struct inode *inode = file->f_dentry->d_inode; |
| 811 | struct ceph_inode_info *ci = ceph_inode(inode); | 811 | struct ceph_inode_info *ci = ceph_inode(inode); |
| 812 | struct ceph_osd_client *osdc = &ceph_client(inode->i_sb)->osdc; | 812 | struct ceph_osd_client *osdc = &ceph_sb_to_client(inode->i_sb)->osdc; |
| 813 | loff_t endoff = pos + iov->iov_len; | 813 | loff_t endoff = pos + iov->iov_len; |
| 814 | int got = 0; | 814 | int got = 0; |
| 815 | int ret, err; | 815 | int ret, err; |
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 85b4d2ffdeba..a81b8b662c7b 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c | |||
| @@ -384,7 +384,7 @@ void ceph_destroy_inode(struct inode *inode) | |||
| 384 | */ | 384 | */ |
| 385 | if (ci->i_snap_realm) { | 385 | if (ci->i_snap_realm) { |
| 386 | struct ceph_mds_client *mdsc = | 386 | struct ceph_mds_client *mdsc = |
| 387 | &ceph_client(ci->vfs_inode.i_sb)->mdsc; | 387 | &ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc; |
| 388 | struct ceph_snap_realm *realm = ci->i_snap_realm; | 388 | struct ceph_snap_realm *realm = ci->i_snap_realm; |
| 389 | 389 | ||
| 390 | dout(" dropping residual ref to snap realm %p\n", realm); | 390 | dout(" dropping residual ref to snap realm %p\n", realm); |
| @@ -619,11 +619,12 @@ static int fill_inode(struct inode *inode, | |||
| 619 | memcpy(ci->i_xattrs.blob->vec.iov_base, | 619 | memcpy(ci->i_xattrs.blob->vec.iov_base, |
| 620 | iinfo->xattr_data, iinfo->xattr_len); | 620 | iinfo->xattr_data, iinfo->xattr_len); |
| 621 | ci->i_xattrs.version = le64_to_cpu(info->xattr_version); | 621 | ci->i_xattrs.version = le64_to_cpu(info->xattr_version); |
| 622 | xattr_blob = NULL; | ||
| 622 | } | 623 | } |
| 623 | 624 | ||
| 624 | inode->i_mapping->a_ops = &ceph_aops; | 625 | inode->i_mapping->a_ops = &ceph_aops; |
| 625 | inode->i_mapping->backing_dev_info = | 626 | inode->i_mapping->backing_dev_info = |
| 626 | &ceph_client(inode->i_sb)->backing_dev_info; | 627 | &ceph_sb_to_client(inode->i_sb)->backing_dev_info; |
| 627 | 628 | ||
| 628 | switch (inode->i_mode & S_IFMT) { | 629 | switch (inode->i_mode & S_IFMT) { |
| 629 | case S_IFIFO: | 630 | case S_IFIFO: |
| @@ -674,14 +675,15 @@ static int fill_inode(struct inode *inode, | |||
| 674 | /* set dir completion flag? */ | 675 | /* set dir completion flag? */ |
| 675 | if (ci->i_files == 0 && ci->i_subdirs == 0 && | 676 | if (ci->i_files == 0 && ci->i_subdirs == 0 && |
| 676 | ceph_snap(inode) == CEPH_NOSNAP && | 677 | ceph_snap(inode) == CEPH_NOSNAP && |
| 677 | (le32_to_cpu(info->cap.caps) & CEPH_CAP_FILE_SHARED)) { | 678 | (le32_to_cpu(info->cap.caps) & CEPH_CAP_FILE_SHARED) && |
| 679 | (ci->i_ceph_flags & CEPH_I_COMPLETE) == 0) { | ||
| 678 | dout(" marking %p complete (empty)\n", inode); | 680 | dout(" marking %p complete (empty)\n", inode); |
| 679 | ci->i_ceph_flags |= CEPH_I_COMPLETE; | 681 | ci->i_ceph_flags |= CEPH_I_COMPLETE; |
| 680 | ci->i_max_offset = 2; | 682 | ci->i_max_offset = 2; |
| 681 | } | 683 | } |
| 682 | 684 | ||
| 683 | /* it may be better to set st_size in getattr instead? */ | 685 | /* it may be better to set st_size in getattr instead? */ |
| 684 | if (ceph_test_opt(ceph_client(inode->i_sb), RBYTES)) | 686 | if (ceph_test_opt(ceph_sb_to_client(inode->i_sb), RBYTES)) |
| 685 | inode->i_size = ci->i_rbytes; | 687 | inode->i_size = ci->i_rbytes; |
| 686 | break; | 688 | break; |
| 687 | default: | 689 | default: |
| @@ -802,6 +804,37 @@ out_unlock: | |||
| 802 | } | 804 | } |
| 803 | 805 | ||
| 804 | /* | 806 | /* |
| 807 | * Set dentry's directory position based on the current dir's max, and | ||
| 808 | * order it in d_subdirs, so that dcache_readdir behaves. | ||
| 809 | */ | ||
| 810 | static void ceph_set_dentry_offset(struct dentry *dn) | ||
| 811 | { | ||
| 812 | struct dentry *dir = dn->d_parent; | ||
| 813 | struct inode *inode = dn->d_parent->d_inode; | ||
| 814 | struct ceph_dentry_info *di; | ||
| 815 | |||
| 816 | BUG_ON(!inode); | ||
| 817 | |||
| 818 | di = ceph_dentry(dn); | ||
| 819 | |||
| 820 | spin_lock(&inode->i_lock); | ||
| 821 | if ((ceph_inode(inode)->i_ceph_flags & CEPH_I_COMPLETE) == 0) { | ||
| 822 | spin_unlock(&inode->i_lock); | ||
| 823 | return; | ||
| 824 | } | ||
| 825 | di->offset = ceph_inode(inode)->i_max_offset++; | ||
| 826 | spin_unlock(&inode->i_lock); | ||
| 827 | |||
| 828 | spin_lock(&dcache_lock); | ||
| 829 | spin_lock(&dn->d_lock); | ||
| 830 | list_move_tail(&dir->d_subdirs, &dn->d_u.d_child); | ||
| 831 | dout("set_dentry_offset %p %lld (%p %p)\n", dn, di->offset, | ||
| 832 | dn->d_u.d_child.prev, dn->d_u.d_child.next); | ||
| 833 | spin_unlock(&dn->d_lock); | ||
| 834 | spin_unlock(&dcache_lock); | ||
| 835 | } | ||
| 836 | |||
| 837 | /* | ||
| 805 | * splice a dentry to an inode. | 838 | * splice a dentry to an inode. |
| 806 | * caller must hold directory i_mutex for this to be safe. | 839 | * caller must hold directory i_mutex for this to be safe. |
| 807 | * | 840 | * |
| @@ -814,6 +847,8 @@ static struct dentry *splice_dentry(struct dentry *dn, struct inode *in, | |||
| 814 | { | 847 | { |
| 815 | struct dentry *realdn; | 848 | struct dentry *realdn; |
| 816 | 849 | ||
| 850 | BUG_ON(dn->d_inode); | ||
| 851 | |||
| 817 | /* dn must be unhashed */ | 852 | /* dn must be unhashed */ |
| 818 | if (!d_unhashed(dn)) | 853 | if (!d_unhashed(dn)) |
| 819 | d_drop(dn); | 854 | d_drop(dn); |
| @@ -835,44 +870,17 @@ static struct dentry *splice_dentry(struct dentry *dn, struct inode *in, | |||
| 835 | dn = realdn; | 870 | dn = realdn; |
| 836 | } else { | 871 | } else { |
| 837 | BUG_ON(!ceph_dentry(dn)); | 872 | BUG_ON(!ceph_dentry(dn)); |
| 838 | |||
| 839 | dout("dn %p attached to %p ino %llx.%llx\n", | 873 | dout("dn %p attached to %p ino %llx.%llx\n", |
| 840 | dn, dn->d_inode, ceph_vinop(dn->d_inode)); | 874 | dn, dn->d_inode, ceph_vinop(dn->d_inode)); |
| 841 | } | 875 | } |
| 842 | if ((!prehash || *prehash) && d_unhashed(dn)) | 876 | if ((!prehash || *prehash) && d_unhashed(dn)) |
| 843 | d_rehash(dn); | 877 | d_rehash(dn); |
| 878 | ceph_set_dentry_offset(dn); | ||
| 844 | out: | 879 | out: |
| 845 | return dn; | 880 | return dn; |
| 846 | } | 881 | } |
| 847 | 882 | ||
| 848 | /* | 883 | /* |
| 849 | * Set dentry's directory position based on the current dir's max, and | ||
| 850 | * order it in d_subdirs, so that dcache_readdir behaves. | ||
| 851 | */ | ||
| 852 | static void ceph_set_dentry_offset(struct dentry *dn) | ||
| 853 | { | ||
| 854 | struct dentry *dir = dn->d_parent; | ||
| 855 | struct inode *inode = dn->d_parent->d_inode; | ||
| 856 | struct ceph_dentry_info *di; | ||
| 857 | |||
| 858 | BUG_ON(!inode); | ||
| 859 | |||
| 860 | di = ceph_dentry(dn); | ||
| 861 | |||
| 862 | spin_lock(&inode->i_lock); | ||
| 863 | di->offset = ceph_inode(inode)->i_max_offset++; | ||
| 864 | spin_unlock(&inode->i_lock); | ||
| 865 | |||
| 866 | spin_lock(&dcache_lock); | ||
| 867 | spin_lock(&dn->d_lock); | ||
| 868 | list_move_tail(&dir->d_subdirs, &dn->d_u.d_child); | ||
| 869 | dout("set_dentry_offset %p %lld (%p %p)\n", dn, di->offset, | ||
| 870 | dn->d_u.d_child.prev, dn->d_u.d_child.next); | ||
| 871 | spin_unlock(&dn->d_lock); | ||
| 872 | spin_unlock(&dcache_lock); | ||
| 873 | } | ||
| 874 | |||
| 875 | /* | ||
| 876 | * Incorporate results into the local cache. This is either just | 884 | * Incorporate results into the local cache. This is either just |
| 877 | * one inode, or a directory, dentry, and possibly linked-to inode (e.g., | 885 | * one inode, or a directory, dentry, and possibly linked-to inode (e.g., |
| 878 | * after a lookup). | 886 | * after a lookup). |
| @@ -933,14 +941,8 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, | |||
| 933 | 941 | ||
| 934 | if (!rinfo->head->is_target && !rinfo->head->is_dentry) { | 942 | if (!rinfo->head->is_target && !rinfo->head->is_dentry) { |
| 935 | dout("fill_trace reply is empty!\n"); | 943 | dout("fill_trace reply is empty!\n"); |
| 936 | if (rinfo->head->result == 0 && req->r_locked_dir) { | 944 | if (rinfo->head->result == 0 && req->r_locked_dir) |
| 937 | struct ceph_inode_info *ci = | 945 | ceph_invalidate_dir_request(req); |
| 938 | ceph_inode(req->r_locked_dir); | ||
| 939 | dout(" clearing %p complete (empty trace)\n", | ||
| 940 | req->r_locked_dir); | ||
| 941 | ci->i_ceph_flags &= ~CEPH_I_COMPLETE; | ||
| 942 | ci->i_release_count++; | ||
| 943 | } | ||
| 944 | return 0; | 946 | return 0; |
| 945 | } | 947 | } |
| 946 | 948 | ||
| @@ -1011,13 +1013,18 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, | |||
| 1011 | req->r_old_dentry->d_name.len, | 1013 | req->r_old_dentry->d_name.len, |
| 1012 | req->r_old_dentry->d_name.name, | 1014 | req->r_old_dentry->d_name.name, |
| 1013 | dn, dn->d_name.len, dn->d_name.name); | 1015 | dn, dn->d_name.len, dn->d_name.name); |
| 1016 | |||
| 1014 | /* ensure target dentry is invalidated, despite | 1017 | /* ensure target dentry is invalidated, despite |
| 1015 | rehashing bug in vfs_rename_dir */ | 1018 | rehashing bug in vfs_rename_dir */ |
| 1016 | dn->d_time = jiffies; | 1019 | ceph_invalidate_dentry_lease(dn); |
| 1017 | ceph_dentry(dn)->lease_shared_gen = 0; | 1020 | |
| 1018 | /* take overwritten dentry's readdir offset */ | 1021 | /* take overwritten dentry's readdir offset */ |
| 1022 | dout("dn %p gets %p offset %lld (old offset %lld)\n", | ||
| 1023 | req->r_old_dentry, dn, ceph_dentry(dn)->offset, | ||
| 1024 | ceph_dentry(req->r_old_dentry)->offset); | ||
| 1019 | ceph_dentry(req->r_old_dentry)->offset = | 1025 | ceph_dentry(req->r_old_dentry)->offset = |
| 1020 | ceph_dentry(dn)->offset; | 1026 | ceph_dentry(dn)->offset; |
| 1027 | |||
| 1021 | dn = req->r_old_dentry; /* use old_dentry */ | 1028 | dn = req->r_old_dentry; /* use old_dentry */ |
| 1022 | in = dn->d_inode; | 1029 | in = dn->d_inode; |
| 1023 | } | 1030 | } |
| @@ -1059,7 +1066,6 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, | |||
| 1059 | goto done; | 1066 | goto done; |
| 1060 | } | 1067 | } |
| 1061 | req->r_dentry = dn; /* may have spliced */ | 1068 | req->r_dentry = dn; /* may have spliced */ |
| 1062 | ceph_set_dentry_offset(dn); | ||
| 1063 | igrab(in); | 1069 | igrab(in); |
| 1064 | } else if (ceph_ino(in) == vino.ino && | 1070 | } else if (ceph_ino(in) == vino.ino && |
| 1065 | ceph_snap(in) == vino.snap) { | 1071 | ceph_snap(in) == vino.snap) { |
| @@ -1102,7 +1108,6 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, | |||
| 1102 | err = PTR_ERR(dn); | 1108 | err = PTR_ERR(dn); |
| 1103 | goto done; | 1109 | goto done; |
| 1104 | } | 1110 | } |
| 1105 | ceph_set_dentry_offset(dn); | ||
| 1106 | req->r_dentry = dn; /* may have spliced */ | 1111 | req->r_dentry = dn; /* may have spliced */ |
| 1107 | igrab(in); | 1112 | igrab(in); |
| 1108 | rinfo->head->is_dentry = 1; /* fool notrace handlers */ | 1113 | rinfo->head->is_dentry = 1; /* fool notrace handlers */ |
| @@ -1429,7 +1434,7 @@ void ceph_queue_vmtruncate(struct inode *inode) | |||
| 1429 | { | 1434 | { |
| 1430 | struct ceph_inode_info *ci = ceph_inode(inode); | 1435 | struct ceph_inode_info *ci = ceph_inode(inode); |
| 1431 | 1436 | ||
| 1432 | if (queue_work(ceph_client(inode->i_sb)->trunc_wq, | 1437 | if (queue_work(ceph_sb_to_client(inode->i_sb)->trunc_wq, |
| 1433 | &ci->i_vmtruncate_work)) { | 1438 | &ci->i_vmtruncate_work)) { |
| 1434 | dout("ceph_queue_vmtruncate %p\n", inode); | 1439 | dout("ceph_queue_vmtruncate %p\n", inode); |
| 1435 | igrab(inode); | 1440 | igrab(inode); |
| @@ -1518,7 +1523,7 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr) | |||
| 1518 | struct inode *parent_inode = dentry->d_parent->d_inode; | 1523 | struct inode *parent_inode = dentry->d_parent->d_inode; |
| 1519 | const unsigned int ia_valid = attr->ia_valid; | 1524 | const unsigned int ia_valid = attr->ia_valid; |
| 1520 | struct ceph_mds_request *req; | 1525 | struct ceph_mds_request *req; |
| 1521 | struct ceph_mds_client *mdsc = &ceph_client(dentry->d_sb)->mdsc; | 1526 | struct ceph_mds_client *mdsc = &ceph_sb_to_client(dentry->d_sb)->mdsc; |
| 1522 | int issued; | 1527 | int issued; |
| 1523 | int release = 0, dirtied = 0; | 1528 | int release = 0, dirtied = 0; |
| 1524 | int mask = 0; | 1529 | int mask = 0; |
diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c index 8a5bcae62846..d085f07756b4 100644 --- a/fs/ceph/ioctl.c +++ b/fs/ceph/ioctl.c | |||
| @@ -98,7 +98,7 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg) | |||
| 98 | struct ceph_ioctl_dataloc dl; | 98 | struct ceph_ioctl_dataloc dl; |
| 99 | struct inode *inode = file->f_dentry->d_inode; | 99 | struct inode *inode = file->f_dentry->d_inode; |
| 100 | struct ceph_inode_info *ci = ceph_inode(inode); | 100 | struct ceph_inode_info *ci = ceph_inode(inode); |
| 101 | struct ceph_osd_client *osdc = &ceph_client(inode->i_sb)->osdc; | 101 | struct ceph_osd_client *osdc = &ceph_sb_to_client(inode->i_sb)->osdc; |
| 102 | u64 len = 1, olen; | 102 | u64 len = 1, olen; |
| 103 | u64 tmp; | 103 | u64 tmp; |
| 104 | struct ceph_object_layout ol; | 104 | struct ceph_object_layout ol; |
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 24561a557e01..885aa5710cfd 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c | |||
| @@ -40,7 +40,7 @@ | |||
| 40 | static void __wake_requests(struct ceph_mds_client *mdsc, | 40 | static void __wake_requests(struct ceph_mds_client *mdsc, |
| 41 | struct list_head *head); | 41 | struct list_head *head); |
| 42 | 42 | ||
| 43 | const static struct ceph_connection_operations mds_con_ops; | 43 | static const struct ceph_connection_operations mds_con_ops; |
| 44 | 44 | ||
| 45 | 45 | ||
| 46 | /* | 46 | /* |
| @@ -665,10 +665,10 @@ static struct ceph_msg *create_session_msg(u32 op, u64 seq) | |||
| 665 | struct ceph_msg *msg; | 665 | struct ceph_msg *msg; |
| 666 | struct ceph_mds_session_head *h; | 666 | struct ceph_mds_session_head *h; |
| 667 | 667 | ||
| 668 | msg = ceph_msg_new(CEPH_MSG_CLIENT_SESSION, sizeof(*h), 0, 0, NULL); | 668 | msg = ceph_msg_new(CEPH_MSG_CLIENT_SESSION, sizeof(*h), GFP_NOFS); |
| 669 | if (IS_ERR(msg)) { | 669 | if (!msg) { |
| 670 | pr_err("create_session_msg ENOMEM creating msg\n"); | 670 | pr_err("create_session_msg ENOMEM creating msg\n"); |
| 671 | return ERR_PTR(PTR_ERR(msg)); | 671 | return NULL; |
| 672 | } | 672 | } |
| 673 | h = msg->front.iov_base; | 673 | h = msg->front.iov_base; |
| 674 | h->op = cpu_to_le32(op); | 674 | h->op = cpu_to_le32(op); |
| @@ -687,7 +687,6 @@ static int __open_session(struct ceph_mds_client *mdsc, | |||
| 687 | struct ceph_msg *msg; | 687 | struct ceph_msg *msg; |
| 688 | int mstate; | 688 | int mstate; |
| 689 | int mds = session->s_mds; | 689 | int mds = session->s_mds; |
| 690 | int err = 0; | ||
| 691 | 690 | ||
| 692 | /* wait for mds to go active? */ | 691 | /* wait for mds to go active? */ |
| 693 | mstate = ceph_mdsmap_get_state(mdsc->mdsmap, mds); | 692 | mstate = ceph_mdsmap_get_state(mdsc->mdsmap, mds); |
| @@ -698,13 +697,9 @@ static int __open_session(struct ceph_mds_client *mdsc, | |||
| 698 | 697 | ||
| 699 | /* send connect message */ | 698 | /* send connect message */ |
| 700 | msg = create_session_msg(CEPH_SESSION_REQUEST_OPEN, session->s_seq); | 699 | msg = create_session_msg(CEPH_SESSION_REQUEST_OPEN, session->s_seq); |
| 701 | if (IS_ERR(msg)) { | 700 | if (!msg) |
| 702 | err = PTR_ERR(msg); | 701 | return -ENOMEM; |
| 703 | goto out; | ||
| 704 | } | ||
| 705 | ceph_con_send(&session->s_con, msg); | 702 | ceph_con_send(&session->s_con, msg); |
| 706 | |||
| 707 | out: | ||
| 708 | return 0; | 703 | return 0; |
| 709 | } | 704 | } |
| 710 | 705 | ||
| @@ -804,12 +799,49 @@ out: | |||
| 804 | } | 799 | } |
| 805 | 800 | ||
| 806 | static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap, | 801 | static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap, |
| 807 | void *arg) | 802 | void *arg) |
| 808 | { | 803 | { |
| 809 | struct ceph_inode_info *ci = ceph_inode(inode); | 804 | struct ceph_inode_info *ci = ceph_inode(inode); |
| 805 | int drop = 0; | ||
| 806 | |||
| 810 | dout("removing cap %p, ci is %p, inode is %p\n", | 807 | dout("removing cap %p, ci is %p, inode is %p\n", |
| 811 | cap, ci, &ci->vfs_inode); | 808 | cap, ci, &ci->vfs_inode); |
| 812 | ceph_remove_cap(cap); | 809 | spin_lock(&inode->i_lock); |
| 810 | __ceph_remove_cap(cap); | ||
| 811 | if (!__ceph_is_any_real_caps(ci)) { | ||
| 812 | struct ceph_mds_client *mdsc = | ||
| 813 | &ceph_sb_to_client(inode->i_sb)->mdsc; | ||
| 814 | |||
| 815 | spin_lock(&mdsc->cap_dirty_lock); | ||
| 816 | if (!list_empty(&ci->i_dirty_item)) { | ||
| 817 | pr_info(" dropping dirty %s state for %p %lld\n", | ||
| 818 | ceph_cap_string(ci->i_dirty_caps), | ||
| 819 | inode, ceph_ino(inode)); | ||
| 820 | ci->i_dirty_caps = 0; | ||
| 821 | list_del_init(&ci->i_dirty_item); | ||
| 822 | drop = 1; | ||
| 823 | } | ||
| 824 | if (!list_empty(&ci->i_flushing_item)) { | ||
| 825 | pr_info(" dropping dirty+flushing %s state for %p %lld\n", | ||
| 826 | ceph_cap_string(ci->i_flushing_caps), | ||
| 827 | inode, ceph_ino(inode)); | ||
| 828 | ci->i_flushing_caps = 0; | ||
| 829 | list_del_init(&ci->i_flushing_item); | ||
| 830 | mdsc->num_cap_flushing--; | ||
| 831 | drop = 1; | ||
| 832 | } | ||
| 833 | if (drop && ci->i_wrbuffer_ref) { | ||
| 834 | pr_info(" dropping dirty data for %p %lld\n", | ||
| 835 | inode, ceph_ino(inode)); | ||
| 836 | ci->i_wrbuffer_ref = 0; | ||
| 837 | ci->i_wrbuffer_ref_head = 0; | ||
| 838 | drop++; | ||
| 839 | } | ||
| 840 | spin_unlock(&mdsc->cap_dirty_lock); | ||
| 841 | } | ||
| 842 | spin_unlock(&inode->i_lock); | ||
| 843 | while (drop--) | ||
| 844 | iput(inode); | ||
| 813 | return 0; | 845 | return 0; |
| 814 | } | 846 | } |
| 815 | 847 | ||
| @@ -821,6 +853,7 @@ static void remove_session_caps(struct ceph_mds_session *session) | |||
| 821 | dout("remove_session_caps on %p\n", session); | 853 | dout("remove_session_caps on %p\n", session); |
| 822 | iterate_session_caps(session, remove_session_caps_cb, NULL); | 854 | iterate_session_caps(session, remove_session_caps_cb, NULL); |
| 823 | BUG_ON(session->s_nr_caps > 0); | 855 | BUG_ON(session->s_nr_caps > 0); |
| 856 | BUG_ON(!list_empty(&session->s_cap_flushing)); | ||
| 824 | cleanup_cap_releases(session); | 857 | cleanup_cap_releases(session); |
| 825 | } | 858 | } |
| 826 | 859 | ||
| @@ -883,8 +916,8 @@ static int send_renew_caps(struct ceph_mds_client *mdsc, | |||
| 883 | ceph_mds_state_name(state)); | 916 | ceph_mds_state_name(state)); |
| 884 | msg = create_session_msg(CEPH_SESSION_REQUEST_RENEWCAPS, | 917 | msg = create_session_msg(CEPH_SESSION_REQUEST_RENEWCAPS, |
| 885 | ++session->s_renew_seq); | 918 | ++session->s_renew_seq); |
| 886 | if (IS_ERR(msg)) | 919 | if (!msg) |
| 887 | return PTR_ERR(msg); | 920 | return -ENOMEM; |
| 888 | ceph_con_send(&session->s_con, msg); | 921 | ceph_con_send(&session->s_con, msg); |
| 889 | return 0; | 922 | return 0; |
| 890 | } | 923 | } |
| @@ -931,17 +964,15 @@ static int request_close_session(struct ceph_mds_client *mdsc, | |||
| 931 | struct ceph_mds_session *session) | 964 | struct ceph_mds_session *session) |
| 932 | { | 965 | { |
| 933 | struct ceph_msg *msg; | 966 | struct ceph_msg *msg; |
| 934 | int err = 0; | ||
| 935 | 967 | ||
| 936 | dout("request_close_session mds%d state %s seq %lld\n", | 968 | dout("request_close_session mds%d state %s seq %lld\n", |
| 937 | session->s_mds, session_state_name(session->s_state), | 969 | session->s_mds, session_state_name(session->s_state), |
| 938 | session->s_seq); | 970 | session->s_seq); |
| 939 | msg = create_session_msg(CEPH_SESSION_REQUEST_CLOSE, session->s_seq); | 971 | msg = create_session_msg(CEPH_SESSION_REQUEST_CLOSE, session->s_seq); |
| 940 | if (IS_ERR(msg)) | 972 | if (!msg) |
| 941 | err = PTR_ERR(msg); | 973 | return -ENOMEM; |
| 942 | else | 974 | ceph_con_send(&session->s_con, msg); |
| 943 | ceph_con_send(&session->s_con, msg); | 975 | return 0; |
| 944 | return err; | ||
| 945 | } | 976 | } |
| 946 | 977 | ||
| 947 | /* | 978 | /* |
| @@ -1059,7 +1090,7 @@ static int add_cap_releases(struct ceph_mds_client *mdsc, | |||
| 1059 | while (session->s_num_cap_releases < session->s_nr_caps + extra) { | 1090 | while (session->s_num_cap_releases < session->s_nr_caps + extra) { |
| 1060 | spin_unlock(&session->s_cap_lock); | 1091 | spin_unlock(&session->s_cap_lock); |
| 1061 | msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPRELEASE, PAGE_CACHE_SIZE, | 1092 | msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPRELEASE, PAGE_CACHE_SIZE, |
| 1062 | 0, 0, NULL); | 1093 | GFP_NOFS); |
| 1063 | if (!msg) | 1094 | if (!msg) |
| 1064 | goto out_unlocked; | 1095 | goto out_unlocked; |
| 1065 | dout("add_cap_releases %p msg %p now %d\n", session, msg, | 1096 | dout("add_cap_releases %p msg %p now %d\n", session, msg, |
| @@ -1151,10 +1182,8 @@ static void send_cap_releases(struct ceph_mds_client *mdsc, | |||
| 1151 | struct ceph_msg *msg; | 1182 | struct ceph_msg *msg; |
| 1152 | 1183 | ||
| 1153 | dout("send_cap_releases mds%d\n", session->s_mds); | 1184 | dout("send_cap_releases mds%d\n", session->s_mds); |
| 1154 | while (1) { | 1185 | spin_lock(&session->s_cap_lock); |
| 1155 | spin_lock(&session->s_cap_lock); | 1186 | while (!list_empty(&session->s_cap_releases_done)) { |
| 1156 | if (list_empty(&session->s_cap_releases_done)) | ||
| 1157 | break; | ||
| 1158 | msg = list_first_entry(&session->s_cap_releases_done, | 1187 | msg = list_first_entry(&session->s_cap_releases_done, |
| 1159 | struct ceph_msg, list_head); | 1188 | struct ceph_msg, list_head); |
| 1160 | list_del_init(&msg->list_head); | 1189 | list_del_init(&msg->list_head); |
| @@ -1162,10 +1191,49 @@ static void send_cap_releases(struct ceph_mds_client *mdsc, | |||
| 1162 | msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); | 1191 | msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); |
| 1163 | dout("send_cap_releases mds%d %p\n", session->s_mds, msg); | 1192 | dout("send_cap_releases mds%d %p\n", session->s_mds, msg); |
| 1164 | ceph_con_send(&session->s_con, msg); | 1193 | ceph_con_send(&session->s_con, msg); |
| 1194 | spin_lock(&session->s_cap_lock); | ||
| 1165 | } | 1195 | } |
| 1166 | spin_unlock(&session->s_cap_lock); | 1196 | spin_unlock(&session->s_cap_lock); |
| 1167 | } | 1197 | } |
| 1168 | 1198 | ||
| 1199 | static void discard_cap_releases(struct ceph_mds_client *mdsc, | ||
| 1200 | struct ceph_mds_session *session) | ||
| 1201 | { | ||
| 1202 | struct ceph_msg *msg; | ||
| 1203 | struct ceph_mds_cap_release *head; | ||
| 1204 | unsigned num; | ||
| 1205 | |||
| 1206 | dout("discard_cap_releases mds%d\n", session->s_mds); | ||
| 1207 | spin_lock(&session->s_cap_lock); | ||
| 1208 | |||
| 1209 | /* zero out the in-progress message */ | ||
| 1210 | msg = list_first_entry(&session->s_cap_releases, | ||
| 1211 | struct ceph_msg, list_head); | ||
| 1212 | head = msg->front.iov_base; | ||
| 1213 | num = le32_to_cpu(head->num); | ||
| 1214 | dout("discard_cap_releases mds%d %p %u\n", session->s_mds, msg, num); | ||
| 1215 | head->num = cpu_to_le32(0); | ||
| 1216 | session->s_num_cap_releases += num; | ||
| 1217 | |||
| 1218 | /* requeue completed messages */ | ||
| 1219 | while (!list_empty(&session->s_cap_releases_done)) { | ||
| 1220 | msg = list_first_entry(&session->s_cap_releases_done, | ||
| 1221 | struct ceph_msg, list_head); | ||
| 1222 | list_del_init(&msg->list_head); | ||
| 1223 | |||
| 1224 | head = msg->front.iov_base; | ||
| 1225 | num = le32_to_cpu(head->num); | ||
| 1226 | dout("discard_cap_releases mds%d %p %u\n", session->s_mds, msg, | ||
| 1227 | num); | ||
| 1228 | session->s_num_cap_releases += num; | ||
| 1229 | head->num = cpu_to_le32(0); | ||
| 1230 | msg->front.iov_len = sizeof(*head); | ||
| 1231 | list_add(&msg->list_head, &session->s_cap_releases); | ||
| 1232 | } | ||
| 1233 | |||
| 1234 | spin_unlock(&session->s_cap_lock); | ||
| 1235 | } | ||
| 1236 | |||
| 1169 | /* | 1237 | /* |
| 1170 | * requests | 1238 | * requests |
| 1171 | */ | 1239 | */ |
| @@ -1181,6 +1249,7 @@ ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode) | |||
| 1181 | if (!req) | 1249 | if (!req) |
| 1182 | return ERR_PTR(-ENOMEM); | 1250 | return ERR_PTR(-ENOMEM); |
| 1183 | 1251 | ||
| 1252 | mutex_init(&req->r_fill_mutex); | ||
| 1184 | req->r_started = jiffies; | 1253 | req->r_started = jiffies; |
| 1185 | req->r_resend_mds = -1; | 1254 | req->r_resend_mds = -1; |
| 1186 | INIT_LIST_HEAD(&req->r_unsafe_dir_item); | 1255 | INIT_LIST_HEAD(&req->r_unsafe_dir_item); |
| @@ -1251,7 +1320,7 @@ retry: | |||
| 1251 | len += 1 + temp->d_name.len; | 1320 | len += 1 + temp->d_name.len; |
| 1252 | temp = temp->d_parent; | 1321 | temp = temp->d_parent; |
| 1253 | if (temp == NULL) { | 1322 | if (temp == NULL) { |
| 1254 | pr_err("build_path_dentry corrupt dentry %p\n", dentry); | 1323 | pr_err("build_path corrupt dentry %p\n", dentry); |
| 1255 | return ERR_PTR(-EINVAL); | 1324 | return ERR_PTR(-EINVAL); |
| 1256 | } | 1325 | } |
| 1257 | } | 1326 | } |
| @@ -1267,7 +1336,7 @@ retry: | |||
| 1267 | struct inode *inode = temp->d_inode; | 1336 | struct inode *inode = temp->d_inode; |
| 1268 | 1337 | ||
| 1269 | if (inode && ceph_snap(inode) == CEPH_SNAPDIR) { | 1338 | if (inode && ceph_snap(inode) == CEPH_SNAPDIR) { |
| 1270 | dout("build_path_dentry path+%d: %p SNAPDIR\n", | 1339 | dout("build_path path+%d: %p SNAPDIR\n", |
| 1271 | pos, temp); | 1340 | pos, temp); |
| 1272 | } else if (stop_on_nosnap && inode && | 1341 | } else if (stop_on_nosnap && inode && |
| 1273 | ceph_snap(inode) == CEPH_NOSNAP) { | 1342 | ceph_snap(inode) == CEPH_NOSNAP) { |
| @@ -1278,20 +1347,18 @@ retry: | |||
| 1278 | break; | 1347 | break; |
| 1279 | strncpy(path + pos, temp->d_name.name, | 1348 | strncpy(path + pos, temp->d_name.name, |
| 1280 | temp->d_name.len); | 1349 | temp->d_name.len); |
| 1281 | dout("build_path_dentry path+%d: %p '%.*s'\n", | ||
| 1282 | pos, temp, temp->d_name.len, path + pos); | ||
| 1283 | } | 1350 | } |
| 1284 | if (pos) | 1351 | if (pos) |
| 1285 | path[--pos] = '/'; | 1352 | path[--pos] = '/'; |
| 1286 | temp = temp->d_parent; | 1353 | temp = temp->d_parent; |
| 1287 | if (temp == NULL) { | 1354 | if (temp == NULL) { |
| 1288 | pr_err("build_path_dentry corrupt dentry\n"); | 1355 | pr_err("build_path corrupt dentry\n"); |
| 1289 | kfree(path); | 1356 | kfree(path); |
| 1290 | return ERR_PTR(-EINVAL); | 1357 | return ERR_PTR(-EINVAL); |
| 1291 | } | 1358 | } |
| 1292 | } | 1359 | } |
| 1293 | if (pos != 0) { | 1360 | if (pos != 0) { |
| 1294 | pr_err("build_path_dentry did not end path lookup where " | 1361 | pr_err("build_path did not end path lookup where " |
| 1295 | "expected, namelen is %d, pos is %d\n", len, pos); | 1362 | "expected, namelen is %d, pos is %d\n", len, pos); |
| 1296 | /* presumably this is only possible if racing with a | 1363 | /* presumably this is only possible if racing with a |
| 1297 | rename of one of the parent directories (we can not | 1364 | rename of one of the parent directories (we can not |
| @@ -1303,7 +1370,7 @@ retry: | |||
| 1303 | 1370 | ||
| 1304 | *base = ceph_ino(temp->d_inode); | 1371 | *base = ceph_ino(temp->d_inode); |
| 1305 | *plen = len; | 1372 | *plen = len; |
| 1306 | dout("build_path_dentry on %p %d built %llx '%.*s'\n", | 1373 | dout("build_path on %p %d built %llx '%.*s'\n", |
| 1307 | dentry, atomic_read(&dentry->d_count), *base, len, path); | 1374 | dentry, atomic_read(&dentry->d_count), *base, len, path); |
| 1308 | return path; | 1375 | return path; |
| 1309 | } | 1376 | } |
| @@ -1426,9 +1493,11 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc, | |||
| 1426 | if (req->r_old_dentry_drop) | 1493 | if (req->r_old_dentry_drop) |
| 1427 | len += req->r_old_dentry->d_name.len; | 1494 | len += req->r_old_dentry->d_name.len; |
| 1428 | 1495 | ||
| 1429 | msg = ceph_msg_new(CEPH_MSG_CLIENT_REQUEST, len, 0, 0, NULL); | 1496 | msg = ceph_msg_new(CEPH_MSG_CLIENT_REQUEST, len, GFP_NOFS); |
| 1430 | if (IS_ERR(msg)) | 1497 | if (!msg) { |
| 1498 | msg = ERR_PTR(-ENOMEM); | ||
| 1431 | goto out_free2; | 1499 | goto out_free2; |
| 1500 | } | ||
| 1432 | 1501 | ||
| 1433 | msg->hdr.tid = cpu_to_le64(req->r_tid); | 1502 | msg->hdr.tid = cpu_to_le64(req->r_tid); |
| 1434 | 1503 | ||
| @@ -1517,9 +1586,9 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc, | |||
| 1517 | } | 1586 | } |
| 1518 | msg = create_request_message(mdsc, req, mds); | 1587 | msg = create_request_message(mdsc, req, mds); |
| 1519 | if (IS_ERR(msg)) { | 1588 | if (IS_ERR(msg)) { |
| 1520 | req->r_reply = ERR_PTR(PTR_ERR(msg)); | 1589 | req->r_err = PTR_ERR(msg); |
| 1521 | complete_request(mdsc, req); | 1590 | complete_request(mdsc, req); |
| 1522 | return -PTR_ERR(msg); | 1591 | return PTR_ERR(msg); |
| 1523 | } | 1592 | } |
| 1524 | req->r_request = msg; | 1593 | req->r_request = msg; |
| 1525 | 1594 | ||
| @@ -1552,7 +1621,7 @@ static int __do_request(struct ceph_mds_client *mdsc, | |||
| 1552 | int mds = -1; | 1621 | int mds = -1; |
| 1553 | int err = -EAGAIN; | 1622 | int err = -EAGAIN; |
| 1554 | 1623 | ||
| 1555 | if (req->r_reply) | 1624 | if (req->r_err || req->r_got_result) |
| 1556 | goto out; | 1625 | goto out; |
| 1557 | 1626 | ||
| 1558 | if (req->r_timeout && | 1627 | if (req->r_timeout && |
| @@ -1609,7 +1678,7 @@ out: | |||
| 1609 | return err; | 1678 | return err; |
| 1610 | 1679 | ||
| 1611 | finish: | 1680 | finish: |
| 1612 | req->r_reply = ERR_PTR(err); | 1681 | req->r_err = err; |
| 1613 | complete_request(mdsc, req); | 1682 | complete_request(mdsc, req); |
| 1614 | goto out; | 1683 | goto out; |
| 1615 | } | 1684 | } |
| @@ -1630,10 +1699,9 @@ static void __wake_requests(struct ceph_mds_client *mdsc, | |||
| 1630 | 1699 | ||
| 1631 | /* | 1700 | /* |
| 1632 | * Wake up threads with requests pending for @mds, so that they can | 1701 | * Wake up threads with requests pending for @mds, so that they can |
| 1633 | * resubmit their requests to a possibly different mds. If @all is set, | 1702 | * resubmit their requests to a possibly different mds. |
| 1634 | * wake up if their requests has been forwarded to @mds, too. | ||
| 1635 | */ | 1703 | */ |
| 1636 | static void kick_requests(struct ceph_mds_client *mdsc, int mds, int all) | 1704 | static void kick_requests(struct ceph_mds_client *mdsc, int mds) |
| 1637 | { | 1705 | { |
| 1638 | struct ceph_mds_request *req; | 1706 | struct ceph_mds_request *req; |
| 1639 | struct rb_node *p; | 1707 | struct rb_node *p; |
| @@ -1689,64 +1757,78 @@ int ceph_mdsc_do_request(struct ceph_mds_client *mdsc, | |||
| 1689 | __register_request(mdsc, req, dir); | 1757 | __register_request(mdsc, req, dir); |
| 1690 | __do_request(mdsc, req); | 1758 | __do_request(mdsc, req); |
| 1691 | 1759 | ||
| 1692 | /* wait */ | 1760 | if (req->r_err) { |
| 1693 | if (!req->r_reply) { | 1761 | err = req->r_err; |
| 1694 | mutex_unlock(&mdsc->mutex); | 1762 | __unregister_request(mdsc, req); |
| 1695 | if (req->r_timeout) { | 1763 | dout("do_request early error %d\n", err); |
| 1696 | err = (long)wait_for_completion_interruptible_timeout( | 1764 | goto out; |
| 1697 | &req->r_completion, req->r_timeout); | ||
| 1698 | if (err == 0) | ||
| 1699 | req->r_reply = ERR_PTR(-EIO); | ||
| 1700 | else if (err < 0) | ||
| 1701 | req->r_reply = ERR_PTR(err); | ||
| 1702 | } else { | ||
| 1703 | err = wait_for_completion_interruptible( | ||
| 1704 | &req->r_completion); | ||
| 1705 | if (err) | ||
| 1706 | req->r_reply = ERR_PTR(err); | ||
| 1707 | } | ||
| 1708 | mutex_lock(&mdsc->mutex); | ||
| 1709 | } | 1765 | } |
| 1710 | 1766 | ||
| 1711 | if (IS_ERR(req->r_reply)) { | 1767 | /* wait */ |
| 1712 | err = PTR_ERR(req->r_reply); | 1768 | mutex_unlock(&mdsc->mutex); |
| 1713 | req->r_reply = NULL; | 1769 | dout("do_request waiting\n"); |
| 1770 | if (req->r_timeout) { | ||
| 1771 | err = (long)wait_for_completion_interruptible_timeout( | ||
| 1772 | &req->r_completion, req->r_timeout); | ||
| 1773 | if (err == 0) | ||
| 1774 | err = -EIO; | ||
| 1775 | } else { | ||
| 1776 | err = wait_for_completion_interruptible(&req->r_completion); | ||
| 1777 | } | ||
| 1778 | dout("do_request waited, got %d\n", err); | ||
| 1779 | mutex_lock(&mdsc->mutex); | ||
| 1714 | 1780 | ||
| 1715 | if (err == -ERESTARTSYS) { | 1781 | /* only abort if we didn't race with a real reply */ |
| 1716 | /* aborted */ | 1782 | if (req->r_got_result) { |
| 1717 | req->r_aborted = true; | 1783 | err = le32_to_cpu(req->r_reply_info.head->result); |
| 1784 | } else if (err < 0) { | ||
| 1785 | dout("aborted request %lld with %d\n", req->r_tid, err); | ||
| 1718 | 1786 | ||
| 1719 | if (req->r_locked_dir && | 1787 | /* |
| 1720 | (req->r_op & CEPH_MDS_OP_WRITE)) { | 1788 | * ensure we aren't running concurrently with |
| 1721 | struct ceph_inode_info *ci = | 1789 | * ceph_fill_trace or ceph_readdir_prepopulate, which |
| 1722 | ceph_inode(req->r_locked_dir); | 1790 | * rely on locks (dir mutex) held by our caller. |
| 1791 | */ | ||
| 1792 | mutex_lock(&req->r_fill_mutex); | ||
| 1793 | req->r_err = err; | ||
| 1794 | req->r_aborted = true; | ||
| 1795 | mutex_unlock(&req->r_fill_mutex); | ||
| 1723 | 1796 | ||
| 1724 | dout("aborted, clearing I_COMPLETE on %p\n", | 1797 | if (req->r_locked_dir && |
| 1725 | req->r_locked_dir); | 1798 | (req->r_op & CEPH_MDS_OP_WRITE)) |
| 1726 | spin_lock(&req->r_locked_dir->i_lock); | 1799 | ceph_invalidate_dir_request(req); |
| 1727 | ci->i_ceph_flags &= ~CEPH_I_COMPLETE; | ||
| 1728 | ci->i_release_count++; | ||
| 1729 | spin_unlock(&req->r_locked_dir->i_lock); | ||
| 1730 | } | ||
| 1731 | } else { | ||
| 1732 | /* clean up this request */ | ||
| 1733 | __unregister_request(mdsc, req); | ||
| 1734 | if (!list_empty(&req->r_unsafe_item)) | ||
| 1735 | list_del_init(&req->r_unsafe_item); | ||
| 1736 | complete(&req->r_safe_completion); | ||
| 1737 | } | ||
| 1738 | } else if (req->r_err) { | ||
| 1739 | err = req->r_err; | ||
| 1740 | } else { | 1800 | } else { |
| 1741 | err = le32_to_cpu(req->r_reply_info.head->result); | 1801 | err = req->r_err; |
| 1742 | } | 1802 | } |
| 1743 | mutex_unlock(&mdsc->mutex); | ||
| 1744 | 1803 | ||
| 1804 | out: | ||
| 1805 | mutex_unlock(&mdsc->mutex); | ||
| 1745 | dout("do_request %p done, result %d\n", req, err); | 1806 | dout("do_request %p done, result %d\n", req, err); |
| 1746 | return err; | 1807 | return err; |
| 1747 | } | 1808 | } |
| 1748 | 1809 | ||
| 1749 | /* | 1810 | /* |
| 1811 | * Invalidate dir I_COMPLETE, dentry lease state on an aborted MDS | ||
| 1812 | * namespace request. | ||
| 1813 | */ | ||
| 1814 | void ceph_invalidate_dir_request(struct ceph_mds_request *req) | ||
| 1815 | { | ||
| 1816 | struct inode *inode = req->r_locked_dir; | ||
| 1817 | struct ceph_inode_info *ci = ceph_inode(inode); | ||
| 1818 | |||
| 1819 | dout("invalidate_dir_request %p (I_COMPLETE, lease(s))\n", inode); | ||
| 1820 | spin_lock(&inode->i_lock); | ||
| 1821 | ci->i_ceph_flags &= ~CEPH_I_COMPLETE; | ||
| 1822 | ci->i_release_count++; | ||
| 1823 | spin_unlock(&inode->i_lock); | ||
| 1824 | |||
| 1825 | if (req->r_dentry) | ||
| 1826 | ceph_invalidate_dentry_lease(req->r_dentry); | ||
| 1827 | if (req->r_old_dentry) | ||
| 1828 | ceph_invalidate_dentry_lease(req->r_old_dentry); | ||
| 1829 | } | ||
| 1830 | |||
| 1831 | /* | ||
| 1750 | * Handle mds reply. | 1832 | * Handle mds reply. |
| 1751 | * | 1833 | * |
| 1752 | * We take the session mutex and parse and process the reply immediately. | 1834 | * We take the session mutex and parse and process the reply immediately. |
| @@ -1797,6 +1879,12 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) | |||
| 1797 | mutex_unlock(&mdsc->mutex); | 1879 | mutex_unlock(&mdsc->mutex); |
| 1798 | goto out; | 1880 | goto out; |
| 1799 | } | 1881 | } |
| 1882 | if (req->r_got_safe && !head->safe) { | ||
| 1883 | pr_warning("got unsafe after safe on %llu from mds%d\n", | ||
| 1884 | tid, mds); | ||
| 1885 | mutex_unlock(&mdsc->mutex); | ||
| 1886 | goto out; | ||
| 1887 | } | ||
| 1800 | 1888 | ||
| 1801 | result = le32_to_cpu(head->result); | 1889 | result = le32_to_cpu(head->result); |
| 1802 | 1890 | ||
| @@ -1838,11 +1926,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) | |||
| 1838 | mutex_unlock(&mdsc->mutex); | 1926 | mutex_unlock(&mdsc->mutex); |
| 1839 | goto out; | 1927 | goto out; |
| 1840 | } | 1928 | } |
| 1841 | } | 1929 | } else { |
| 1842 | |||
| 1843 | BUG_ON(req->r_reply); | ||
| 1844 | |||
| 1845 | if (!head->safe) { | ||
| 1846 | req->r_got_unsafe = true; | 1930 | req->r_got_unsafe = true; |
| 1847 | list_add_tail(&req->r_unsafe_item, &req->r_session->s_unsafe); | 1931 | list_add_tail(&req->r_unsafe_item, &req->r_session->s_unsafe); |
| 1848 | } | 1932 | } |
| @@ -1871,21 +1955,30 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) | |||
| 1871 | } | 1955 | } |
| 1872 | 1956 | ||
| 1873 | /* insert trace into our cache */ | 1957 | /* insert trace into our cache */ |
| 1958 | mutex_lock(&req->r_fill_mutex); | ||
| 1874 | err = ceph_fill_trace(mdsc->client->sb, req, req->r_session); | 1959 | err = ceph_fill_trace(mdsc->client->sb, req, req->r_session); |
| 1875 | if (err == 0) { | 1960 | if (err == 0) { |
| 1876 | if (result == 0 && rinfo->dir_nr) | 1961 | if (result == 0 && rinfo->dir_nr) |
| 1877 | ceph_readdir_prepopulate(req, req->r_session); | 1962 | ceph_readdir_prepopulate(req, req->r_session); |
| 1878 | ceph_unreserve_caps(&req->r_caps_reservation); | 1963 | ceph_unreserve_caps(&req->r_caps_reservation); |
| 1879 | } | 1964 | } |
| 1965 | mutex_unlock(&req->r_fill_mutex); | ||
| 1880 | 1966 | ||
| 1881 | up_read(&mdsc->snap_rwsem); | 1967 | up_read(&mdsc->snap_rwsem); |
| 1882 | out_err: | 1968 | out_err: |
| 1883 | if (err) { | 1969 | mutex_lock(&mdsc->mutex); |
| 1884 | req->r_err = err; | 1970 | if (!req->r_aborted) { |
| 1971 | if (err) { | ||
| 1972 | req->r_err = err; | ||
| 1973 | } else { | ||
| 1974 | req->r_reply = msg; | ||
| 1975 | ceph_msg_get(msg); | ||
| 1976 | req->r_got_result = true; | ||
| 1977 | } | ||
| 1885 | } else { | 1978 | } else { |
| 1886 | req->r_reply = msg; | 1979 | dout("reply arrived after request %lld was aborted\n", tid); |
| 1887 | ceph_msg_get(msg); | ||
| 1888 | } | 1980 | } |
| 1981 | mutex_unlock(&mdsc->mutex); | ||
| 1889 | 1982 | ||
| 1890 | add_cap_releases(mdsc, req->r_session, -1); | 1983 | add_cap_releases(mdsc, req->r_session, -1); |
| 1891 | mutex_unlock(&session->s_mutex); | 1984 | mutex_unlock(&session->s_mutex); |
| @@ -1984,6 +2077,8 @@ static void handle_session(struct ceph_mds_session *session, | |||
| 1984 | 2077 | ||
| 1985 | switch (op) { | 2078 | switch (op) { |
| 1986 | case CEPH_SESSION_OPEN: | 2079 | case CEPH_SESSION_OPEN: |
| 2080 | if (session->s_state == CEPH_MDS_SESSION_RECONNECTING) | ||
| 2081 | pr_info("mds%d reconnect success\n", session->s_mds); | ||
| 1987 | session->s_state = CEPH_MDS_SESSION_OPEN; | 2082 | session->s_state = CEPH_MDS_SESSION_OPEN; |
| 1988 | renewed_caps(mdsc, session, 0); | 2083 | renewed_caps(mdsc, session, 0); |
| 1989 | wake = 1; | 2084 | wake = 1; |
| @@ -1997,10 +2092,12 @@ static void handle_session(struct ceph_mds_session *session, | |||
| 1997 | break; | 2092 | break; |
| 1998 | 2093 | ||
| 1999 | case CEPH_SESSION_CLOSE: | 2094 | case CEPH_SESSION_CLOSE: |
| 2095 | if (session->s_state == CEPH_MDS_SESSION_RECONNECTING) | ||
| 2096 | pr_info("mds%d reconnect denied\n", session->s_mds); | ||
| 2000 | remove_session_caps(session); | 2097 | remove_session_caps(session); |
| 2001 | wake = 1; /* for good measure */ | 2098 | wake = 1; /* for good measure */ |
| 2002 | complete(&mdsc->session_close_waiters); | 2099 | complete(&mdsc->session_close_waiters); |
| 2003 | kick_requests(mdsc, mds, 0); /* cur only */ | 2100 | kick_requests(mdsc, mds); |
| 2004 | break; | 2101 | break; |
| 2005 | 2102 | ||
| 2006 | case CEPH_SESSION_STALE: | 2103 | case CEPH_SESSION_STALE: |
| @@ -2132,54 +2229,44 @@ out: | |||
| 2132 | * | 2229 | * |
| 2133 | * called with mdsc->mutex held. | 2230 | * called with mdsc->mutex held. |
| 2134 | */ | 2231 | */ |
| 2135 | static void send_mds_reconnect(struct ceph_mds_client *mdsc, int mds) | 2232 | static void send_mds_reconnect(struct ceph_mds_client *mdsc, |
| 2233 | struct ceph_mds_session *session) | ||
| 2136 | { | 2234 | { |
| 2137 | struct ceph_mds_session *session = NULL; | ||
| 2138 | struct ceph_msg *reply; | 2235 | struct ceph_msg *reply; |
| 2139 | struct rb_node *p; | 2236 | struct rb_node *p; |
| 2237 | int mds = session->s_mds; | ||
| 2140 | int err = -ENOMEM; | 2238 | int err = -ENOMEM; |
| 2141 | struct ceph_pagelist *pagelist; | 2239 | struct ceph_pagelist *pagelist; |
| 2142 | 2240 | ||
| 2143 | pr_info("reconnect to recovering mds%d\n", mds); | 2241 | pr_info("mds%d reconnect start\n", mds); |
| 2144 | 2242 | ||
| 2145 | pagelist = kmalloc(sizeof(*pagelist), GFP_NOFS); | 2243 | pagelist = kmalloc(sizeof(*pagelist), GFP_NOFS); |
| 2146 | if (!pagelist) | 2244 | if (!pagelist) |
| 2147 | goto fail_nopagelist; | 2245 | goto fail_nopagelist; |
| 2148 | ceph_pagelist_init(pagelist); | 2246 | ceph_pagelist_init(pagelist); |
| 2149 | 2247 | ||
| 2150 | reply = ceph_msg_new(CEPH_MSG_CLIENT_RECONNECT, 0, 0, 0, NULL); | 2248 | reply = ceph_msg_new(CEPH_MSG_CLIENT_RECONNECT, 0, GFP_NOFS); |
| 2151 | if (IS_ERR(reply)) { | 2249 | if (!reply) |
| 2152 | err = PTR_ERR(reply); | ||
| 2153 | goto fail_nomsg; | 2250 | goto fail_nomsg; |
| 2154 | } | ||
| 2155 | |||
| 2156 | /* find session */ | ||
| 2157 | session = __ceph_lookup_mds_session(mdsc, mds); | ||
| 2158 | mutex_unlock(&mdsc->mutex); /* drop lock for duration */ | ||
| 2159 | 2251 | ||
| 2160 | if (session) { | 2252 | mutex_lock(&session->s_mutex); |
| 2161 | mutex_lock(&session->s_mutex); | 2253 | session->s_state = CEPH_MDS_SESSION_RECONNECTING; |
| 2254 | session->s_seq = 0; | ||
| 2162 | 2255 | ||
| 2163 | session->s_state = CEPH_MDS_SESSION_RECONNECTING; | 2256 | ceph_con_open(&session->s_con, |
| 2164 | session->s_seq = 0; | 2257 | ceph_mdsmap_get_addr(mdsc->mdsmap, mds)); |
| 2165 | 2258 | ||
| 2166 | ceph_con_open(&session->s_con, | 2259 | /* replay unsafe requests */ |
| 2167 | ceph_mdsmap_get_addr(mdsc->mdsmap, mds)); | 2260 | replay_unsafe_requests(mdsc, session); |
| 2168 | |||
| 2169 | /* replay unsafe requests */ | ||
| 2170 | replay_unsafe_requests(mdsc, session); | ||
| 2171 | } else { | ||
| 2172 | dout("no session for mds%d, will send short reconnect\n", | ||
| 2173 | mds); | ||
| 2174 | } | ||
| 2175 | 2261 | ||
| 2176 | down_read(&mdsc->snap_rwsem); | 2262 | down_read(&mdsc->snap_rwsem); |
| 2177 | 2263 | ||
| 2178 | if (!session) | ||
| 2179 | goto send; | ||
| 2180 | dout("session %p state %s\n", session, | 2264 | dout("session %p state %s\n", session, |
| 2181 | session_state_name(session->s_state)); | 2265 | session_state_name(session->s_state)); |
| 2182 | 2266 | ||
| 2267 | /* drop old cap expires; we're about to reestablish that state */ | ||
| 2268 | discard_cap_releases(mdsc, session); | ||
| 2269 | |||
| 2183 | /* traverse this session's caps */ | 2270 | /* traverse this session's caps */ |
| 2184 | err = ceph_pagelist_encode_32(pagelist, session->s_nr_caps); | 2271 | err = ceph_pagelist_encode_32(pagelist, session->s_nr_caps); |
| 2185 | if (err) | 2272 | if (err) |
| @@ -2208,36 +2295,29 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, int mds) | |||
| 2208 | goto fail; | 2295 | goto fail; |
| 2209 | } | 2296 | } |
| 2210 | 2297 | ||
| 2211 | send: | ||
| 2212 | reply->pagelist = pagelist; | 2298 | reply->pagelist = pagelist; |
| 2213 | reply->hdr.data_len = cpu_to_le32(pagelist->length); | 2299 | reply->hdr.data_len = cpu_to_le32(pagelist->length); |
| 2214 | reply->nr_pages = calc_pages_for(0, pagelist->length); | 2300 | reply->nr_pages = calc_pages_for(0, pagelist->length); |
| 2215 | ceph_con_send(&session->s_con, reply); | 2301 | ceph_con_send(&session->s_con, reply); |
| 2216 | 2302 | ||
| 2217 | session->s_state = CEPH_MDS_SESSION_OPEN; | ||
| 2218 | mutex_unlock(&session->s_mutex); | 2303 | mutex_unlock(&session->s_mutex); |
| 2219 | 2304 | ||
| 2220 | mutex_lock(&mdsc->mutex); | 2305 | mutex_lock(&mdsc->mutex); |
| 2221 | __wake_requests(mdsc, &session->s_waiting); | 2306 | __wake_requests(mdsc, &session->s_waiting); |
| 2222 | mutex_unlock(&mdsc->mutex); | 2307 | mutex_unlock(&mdsc->mutex); |
| 2223 | 2308 | ||
| 2224 | ceph_put_mds_session(session); | ||
| 2225 | |||
| 2226 | up_read(&mdsc->snap_rwsem); | 2309 | up_read(&mdsc->snap_rwsem); |
| 2227 | mutex_lock(&mdsc->mutex); | ||
| 2228 | return; | 2310 | return; |
| 2229 | 2311 | ||
| 2230 | fail: | 2312 | fail: |
| 2231 | ceph_msg_put(reply); | 2313 | ceph_msg_put(reply); |
| 2232 | up_read(&mdsc->snap_rwsem); | 2314 | up_read(&mdsc->snap_rwsem); |
| 2233 | mutex_unlock(&session->s_mutex); | 2315 | mutex_unlock(&session->s_mutex); |
| 2234 | ceph_put_mds_session(session); | ||
| 2235 | fail_nomsg: | 2316 | fail_nomsg: |
| 2236 | ceph_pagelist_release(pagelist); | 2317 | ceph_pagelist_release(pagelist); |
| 2237 | kfree(pagelist); | 2318 | kfree(pagelist); |
| 2238 | fail_nopagelist: | 2319 | fail_nopagelist: |
| 2239 | pr_err("error %d preparing reconnect for mds%d\n", err, mds); | 2320 | pr_err("error %d preparing reconnect for mds%d\n", err, mds); |
| 2240 | mutex_lock(&mdsc->mutex); | ||
| 2241 | return; | 2321 | return; |
| 2242 | } | 2322 | } |
| 2243 | 2323 | ||
| @@ -2290,7 +2370,7 @@ static void check_new_map(struct ceph_mds_client *mdsc, | |||
| 2290 | } | 2370 | } |
| 2291 | 2371 | ||
| 2292 | /* kick any requests waiting on the recovering mds */ | 2372 | /* kick any requests waiting on the recovering mds */ |
| 2293 | kick_requests(mdsc, i, 1); | 2373 | kick_requests(mdsc, i); |
| 2294 | } else if (oldstate == newstate) { | 2374 | } else if (oldstate == newstate) { |
| 2295 | continue; /* nothing new with this mds */ | 2375 | continue; /* nothing new with this mds */ |
| 2296 | } | 2376 | } |
| @@ -2299,22 +2379,21 @@ static void check_new_map(struct ceph_mds_client *mdsc, | |||
| 2299 | * send reconnect? | 2379 | * send reconnect? |
| 2300 | */ | 2380 | */ |
| 2301 | if (s->s_state == CEPH_MDS_SESSION_RESTARTING && | 2381 | if (s->s_state == CEPH_MDS_SESSION_RESTARTING && |
| 2302 | newstate >= CEPH_MDS_STATE_RECONNECT) | 2382 | newstate >= CEPH_MDS_STATE_RECONNECT) { |
| 2303 | send_mds_reconnect(mdsc, i); | 2383 | mutex_unlock(&mdsc->mutex); |
| 2384 | send_mds_reconnect(mdsc, s); | ||
| 2385 | mutex_lock(&mdsc->mutex); | ||
| 2386 | } | ||
| 2304 | 2387 | ||
| 2305 | /* | 2388 | /* |
| 2306 | * kick requests on any mds that has gone active. | 2389 | * kick request on any mds that has gone active. |
| 2307 | * | ||
| 2308 | * kick requests on cur or forwarder: we may have sent | ||
| 2309 | * the request to mds1, mds1 told us it forwarded it | ||
| 2310 | * to mds2, but then we learn mds1 failed and can't be | ||
| 2311 | * sure it successfully forwarded our request before | ||
| 2312 | * it died. | ||
| 2313 | */ | 2390 | */ |
| 2314 | if (oldstate < CEPH_MDS_STATE_ACTIVE && | 2391 | if (oldstate < CEPH_MDS_STATE_ACTIVE && |
| 2315 | newstate >= CEPH_MDS_STATE_ACTIVE) { | 2392 | newstate >= CEPH_MDS_STATE_ACTIVE) { |
| 2316 | pr_info("mds%d reconnect completed\n", s->s_mds); | 2393 | if (oldstate != CEPH_MDS_STATE_CREATING && |
| 2317 | kick_requests(mdsc, i, 1); | 2394 | oldstate != CEPH_MDS_STATE_STARTING) |
| 2395 | pr_info("mds%d recovery completed\n", s->s_mds); | ||
| 2396 | kick_requests(mdsc, i); | ||
| 2318 | ceph_kick_flushing_caps(mdsc, s); | 2397 | ceph_kick_flushing_caps(mdsc, s); |
| 2319 | wake_up_session_caps(s, 1); | 2398 | wake_up_session_caps(s, 1); |
| 2320 | } | 2399 | } |
| @@ -2457,8 +2536,8 @@ void ceph_mdsc_lease_send_msg(struct ceph_mds_session *session, | |||
| 2457 | dnamelen = dentry->d_name.len; | 2536 | dnamelen = dentry->d_name.len; |
| 2458 | len += dnamelen; | 2537 | len += dnamelen; |
| 2459 | 2538 | ||
| 2460 | msg = ceph_msg_new(CEPH_MSG_CLIENT_LEASE, len, 0, 0, NULL); | 2539 | msg = ceph_msg_new(CEPH_MSG_CLIENT_LEASE, len, GFP_NOFS); |
| 2461 | if (IS_ERR(msg)) | 2540 | if (!msg) |
| 2462 | return; | 2541 | return; |
| 2463 | lease = msg->front.iov_base; | 2542 | lease = msg->front.iov_base; |
| 2464 | lease->action = action; | 2543 | lease->action = action; |
| @@ -2603,7 +2682,9 @@ static void delayed_work(struct work_struct *work) | |||
| 2603 | else | 2682 | else |
| 2604 | ceph_con_keepalive(&s->s_con); | 2683 | ceph_con_keepalive(&s->s_con); |
| 2605 | add_cap_releases(mdsc, s, -1); | 2684 | add_cap_releases(mdsc, s, -1); |
| 2606 | send_cap_releases(mdsc, s); | 2685 | if (s->s_state == CEPH_MDS_SESSION_OPEN || |
| 2686 | s->s_state == CEPH_MDS_SESSION_HUNG) | ||
| 2687 | send_cap_releases(mdsc, s); | ||
| 2607 | mutex_unlock(&s->s_mutex); | 2688 | mutex_unlock(&s->s_mutex); |
| 2608 | ceph_put_mds_session(s); | 2689 | ceph_put_mds_session(s); |
| 2609 | 2690 | ||
| @@ -2620,6 +2701,9 @@ int ceph_mdsc_init(struct ceph_mds_client *mdsc, struct ceph_client *client) | |||
| 2620 | mdsc->client = client; | 2701 | mdsc->client = client; |
| 2621 | mutex_init(&mdsc->mutex); | 2702 | mutex_init(&mdsc->mutex); |
| 2622 | mdsc->mdsmap = kzalloc(sizeof(*mdsc->mdsmap), GFP_NOFS); | 2703 | mdsc->mdsmap = kzalloc(sizeof(*mdsc->mdsmap), GFP_NOFS); |
| 2704 | if (mdsc->mdsmap == NULL) | ||
| 2705 | return -ENOMEM; | ||
| 2706 | |||
| 2623 | init_completion(&mdsc->safe_umount_waiters); | 2707 | init_completion(&mdsc->safe_umount_waiters); |
| 2624 | init_completion(&mdsc->session_close_waiters); | 2708 | init_completion(&mdsc->session_close_waiters); |
| 2625 | INIT_LIST_HEAD(&mdsc->waiting_for_map); | 2709 | INIT_LIST_HEAD(&mdsc->waiting_for_map); |
| @@ -2645,6 +2729,7 @@ int ceph_mdsc_init(struct ceph_mds_client *mdsc, struct ceph_client *client) | |||
| 2645 | init_waitqueue_head(&mdsc->cap_flushing_wq); | 2729 | init_waitqueue_head(&mdsc->cap_flushing_wq); |
| 2646 | spin_lock_init(&mdsc->dentry_lru_lock); | 2730 | spin_lock_init(&mdsc->dentry_lru_lock); |
| 2647 | INIT_LIST_HEAD(&mdsc->dentry_lru); | 2731 | INIT_LIST_HEAD(&mdsc->dentry_lru); |
| 2732 | |||
| 2648 | return 0; | 2733 | return 0; |
| 2649 | } | 2734 | } |
| 2650 | 2735 | ||
| @@ -2740,6 +2825,9 @@ void ceph_mdsc_sync(struct ceph_mds_client *mdsc) | |||
| 2740 | { | 2825 | { |
| 2741 | u64 want_tid, want_flush; | 2826 | u64 want_tid, want_flush; |
| 2742 | 2827 | ||
| 2828 | if (mdsc->client->mount_state == CEPH_MOUNT_SHUTDOWN) | ||
| 2829 | return; | ||
| 2830 | |||
| 2743 | dout("sync\n"); | 2831 | dout("sync\n"); |
| 2744 | mutex_lock(&mdsc->mutex); | 2832 | mutex_lock(&mdsc->mutex); |
| 2745 | want_tid = mdsc->last_tid; | 2833 | want_tid = mdsc->last_tid; |
| @@ -2922,9 +3010,10 @@ static void con_put(struct ceph_connection *con) | |||
| 2922 | static void peer_reset(struct ceph_connection *con) | 3010 | static void peer_reset(struct ceph_connection *con) |
| 2923 | { | 3011 | { |
| 2924 | struct ceph_mds_session *s = con->private; | 3012 | struct ceph_mds_session *s = con->private; |
| 3013 | struct ceph_mds_client *mdsc = s->s_mdsc; | ||
| 2925 | 3014 | ||
| 2926 | pr_err("mds%d gave us the boot. IMPLEMENT RECONNECT.\n", | 3015 | pr_warning("mds%d closed our session\n", s->s_mds); |
| 2927 | s->s_mds); | 3016 | send_mds_reconnect(mdsc, s); |
| 2928 | } | 3017 | } |
| 2929 | 3018 | ||
| 2930 | static void dispatch(struct ceph_connection *con, struct ceph_msg *msg) | 3019 | static void dispatch(struct ceph_connection *con, struct ceph_msg *msg) |
| @@ -3031,7 +3120,7 @@ static int invalidate_authorizer(struct ceph_connection *con) | |||
| 3031 | return ceph_monc_validate_auth(&mdsc->client->monc); | 3120 | return ceph_monc_validate_auth(&mdsc->client->monc); |
| 3032 | } | 3121 | } |
| 3033 | 3122 | ||
| 3034 | const static struct ceph_connection_operations mds_con_ops = { | 3123 | static const struct ceph_connection_operations mds_con_ops = { |
| 3035 | .get = con_get, | 3124 | .get = con_get, |
| 3036 | .put = con_put, | 3125 | .put = con_put, |
| 3037 | .dispatch = dispatch, | 3126 | .dispatch = dispatch, |
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index 961cc6f65878..d9936c4f1212 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h | |||
| @@ -165,6 +165,8 @@ struct ceph_mds_request { | |||
| 165 | struct inode *r_locked_dir; /* dir (if any) i_mutex locked by vfs */ | 165 | struct inode *r_locked_dir; /* dir (if any) i_mutex locked by vfs */ |
| 166 | struct inode *r_target_inode; /* resulting inode */ | 166 | struct inode *r_target_inode; /* resulting inode */ |
| 167 | 167 | ||
| 168 | struct mutex r_fill_mutex; | ||
| 169 | |||
| 168 | union ceph_mds_request_args r_args; | 170 | union ceph_mds_request_args r_args; |
| 169 | int r_fmode; /* file mode, if expecting cap */ | 171 | int r_fmode; /* file mode, if expecting cap */ |
| 170 | 172 | ||
| @@ -213,7 +215,7 @@ struct ceph_mds_request { | |||
| 213 | struct completion r_safe_completion; | 215 | struct completion r_safe_completion; |
| 214 | ceph_mds_request_callback_t r_callback; | 216 | ceph_mds_request_callback_t r_callback; |
| 215 | struct list_head r_unsafe_item; /* per-session unsafe list item */ | 217 | struct list_head r_unsafe_item; /* per-session unsafe list item */ |
| 216 | bool r_got_unsafe, r_got_safe; | 218 | bool r_got_unsafe, r_got_safe, r_got_result; |
| 217 | 219 | ||
| 218 | bool r_did_prepopulate; | 220 | bool r_did_prepopulate; |
| 219 | u32 r_readdir_offset; | 221 | u32 r_readdir_offset; |
| @@ -301,6 +303,8 @@ extern void ceph_mdsc_lease_release(struct ceph_mds_client *mdsc, | |||
| 301 | struct inode *inode, | 303 | struct inode *inode, |
| 302 | struct dentry *dn, int mask); | 304 | struct dentry *dn, int mask); |
| 303 | 305 | ||
| 306 | extern void ceph_invalidate_dir_request(struct ceph_mds_request *req); | ||
| 307 | |||
| 304 | extern struct ceph_mds_request * | 308 | extern struct ceph_mds_request * |
| 305 | ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode); | 309 | ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode); |
| 306 | extern void ceph_mdsc_submit_request(struct ceph_mds_client *mdsc, | 310 | extern void ceph_mdsc_submit_request(struct ceph_mds_client *mdsc, |
diff --git a/fs/ceph/messenger.c b/fs/ceph/messenger.c index cd4fadb6491a..60b74839ebec 100644 --- a/fs/ceph/messenger.c +++ b/fs/ceph/messenger.c | |||
| @@ -39,18 +39,6 @@ static void queue_con(struct ceph_connection *con); | |||
| 39 | static void con_work(struct work_struct *); | 39 | static void con_work(struct work_struct *); |
| 40 | static void ceph_fault(struct ceph_connection *con); | 40 | static void ceph_fault(struct ceph_connection *con); |
| 41 | 41 | ||
| 42 | const char *ceph_name_type_str(int t) | ||
| 43 | { | ||
| 44 | switch (t) { | ||
| 45 | case CEPH_ENTITY_TYPE_MON: return "mon"; | ||
| 46 | case CEPH_ENTITY_TYPE_MDS: return "mds"; | ||
| 47 | case CEPH_ENTITY_TYPE_OSD: return "osd"; | ||
| 48 | case CEPH_ENTITY_TYPE_CLIENT: return "client"; | ||
| 49 | case CEPH_ENTITY_TYPE_ADMIN: return "admin"; | ||
| 50 | default: return "???"; | ||
| 51 | } | ||
| 52 | } | ||
| 53 | |||
| 54 | /* | 42 | /* |
| 55 | * nicely render a sockaddr as a string. | 43 | * nicely render a sockaddr as a string. |
| 56 | */ | 44 | */ |
| @@ -340,6 +328,7 @@ static void reset_connection(struct ceph_connection *con) | |||
| 340 | ceph_msg_put(con->out_msg); | 328 | ceph_msg_put(con->out_msg); |
| 341 | con->out_msg = NULL; | 329 | con->out_msg = NULL; |
| 342 | } | 330 | } |
| 331 | con->out_keepalive_pending = false; | ||
| 343 | con->in_seq = 0; | 332 | con->in_seq = 0; |
| 344 | con->in_seq_acked = 0; | 333 | con->in_seq_acked = 0; |
| 345 | } | 334 | } |
| @@ -357,6 +346,7 @@ void ceph_con_close(struct ceph_connection *con) | |||
| 357 | clear_bit(WRITE_PENDING, &con->state); | 346 | clear_bit(WRITE_PENDING, &con->state); |
| 358 | mutex_lock(&con->mutex); | 347 | mutex_lock(&con->mutex); |
| 359 | reset_connection(con); | 348 | reset_connection(con); |
| 349 | con->peer_global_seq = 0; | ||
| 360 | cancel_delayed_work(&con->work); | 350 | cancel_delayed_work(&con->work); |
| 361 | mutex_unlock(&con->mutex); | 351 | mutex_unlock(&con->mutex); |
| 362 | queue_con(con); | 352 | queue_con(con); |
| @@ -661,7 +651,7 @@ static void prepare_write_connect(struct ceph_messenger *msgr, | |||
| 661 | dout("prepare_write_connect %p cseq=%d gseq=%d proto=%d\n", con, | 651 | dout("prepare_write_connect %p cseq=%d gseq=%d proto=%d\n", con, |
| 662 | con->connect_seq, global_seq, proto); | 652 | con->connect_seq, global_seq, proto); |
| 663 | 653 | ||
| 664 | con->out_connect.features = CEPH_FEATURE_SUPPORTED; | 654 | con->out_connect.features = CEPH_FEATURE_SUPPORTED_CLIENT; |
| 665 | con->out_connect.host_type = cpu_to_le32(CEPH_ENTITY_TYPE_CLIENT); | 655 | con->out_connect.host_type = cpu_to_le32(CEPH_ENTITY_TYPE_CLIENT); |
| 666 | con->out_connect.connect_seq = cpu_to_le32(con->connect_seq); | 656 | con->out_connect.connect_seq = cpu_to_le32(con->connect_seq); |
| 667 | con->out_connect.global_seq = cpu_to_le32(global_seq); | 657 | con->out_connect.global_seq = cpu_to_le32(global_seq); |
| @@ -1124,8 +1114,8 @@ static void fail_protocol(struct ceph_connection *con) | |||
| 1124 | 1114 | ||
| 1125 | static int process_connect(struct ceph_connection *con) | 1115 | static int process_connect(struct ceph_connection *con) |
| 1126 | { | 1116 | { |
| 1127 | u64 sup_feat = CEPH_FEATURE_SUPPORTED; | 1117 | u64 sup_feat = CEPH_FEATURE_SUPPORTED_CLIENT; |
| 1128 | u64 req_feat = CEPH_FEATURE_REQUIRED; | 1118 | u64 req_feat = CEPH_FEATURE_REQUIRED_CLIENT; |
| 1129 | u64 server_feat = le64_to_cpu(con->in_reply.features); | 1119 | u64 server_feat = le64_to_cpu(con->in_reply.features); |
| 1130 | 1120 | ||
| 1131 | dout("process_connect on %p tag %d\n", con, (int)con->in_tag); | 1121 | dout("process_connect on %p tag %d\n", con, (int)con->in_tag); |
| @@ -1233,6 +1223,7 @@ static int process_connect(struct ceph_connection *con) | |||
| 1233 | clear_bit(CONNECTING, &con->state); | 1223 | clear_bit(CONNECTING, &con->state); |
| 1234 | con->peer_global_seq = le32_to_cpu(con->in_reply.global_seq); | 1224 | con->peer_global_seq = le32_to_cpu(con->in_reply.global_seq); |
| 1235 | con->connect_seq++; | 1225 | con->connect_seq++; |
| 1226 | con->peer_features = server_feat; | ||
| 1236 | dout("process_connect got READY gseq %d cseq %d (%d)\n", | 1227 | dout("process_connect got READY gseq %d cseq %d (%d)\n", |
| 1237 | con->peer_global_seq, | 1228 | con->peer_global_seq, |
| 1238 | le32_to_cpu(con->in_reply.connect_seq), | 1229 | le32_to_cpu(con->in_reply.connect_seq), |
| @@ -1402,19 +1393,17 @@ static int read_partial_message(struct ceph_connection *con) | |||
| 1402 | con->in_msg = ceph_alloc_msg(con, &con->in_hdr, &skip); | 1393 | con->in_msg = ceph_alloc_msg(con, &con->in_hdr, &skip); |
| 1403 | if (skip) { | 1394 | if (skip) { |
| 1404 | /* skip this message */ | 1395 | /* skip this message */ |
| 1405 | dout("alloc_msg returned NULL, skipping message\n"); | 1396 | dout("alloc_msg said skip message\n"); |
| 1406 | con->in_base_pos = -front_len - middle_len - data_len - | 1397 | con->in_base_pos = -front_len - middle_len - data_len - |
| 1407 | sizeof(m->footer); | 1398 | sizeof(m->footer); |
| 1408 | con->in_tag = CEPH_MSGR_TAG_READY; | 1399 | con->in_tag = CEPH_MSGR_TAG_READY; |
| 1409 | con->in_seq++; | 1400 | con->in_seq++; |
| 1410 | return 0; | 1401 | return 0; |
| 1411 | } | 1402 | } |
| 1412 | if (IS_ERR(con->in_msg)) { | 1403 | if (!con->in_msg) { |
| 1413 | ret = PTR_ERR(con->in_msg); | ||
| 1414 | con->in_msg = NULL; | ||
| 1415 | con->error_msg = | 1404 | con->error_msg = |
| 1416 | "error allocating memory for incoming message"; | 1405 | "error allocating memory for incoming message"; |
| 1417 | return ret; | 1406 | return -ENOMEM; |
| 1418 | } | 1407 | } |
| 1419 | m = con->in_msg; | 1408 | m = con->in_msg; |
| 1420 | m->front.iov_len = 0; /* haven't read it yet */ | 1409 | m->front.iov_len = 0; /* haven't read it yet */ |
| @@ -1514,14 +1503,14 @@ static void process_message(struct ceph_connection *con) | |||
| 1514 | 1503 | ||
| 1515 | /* if first message, set peer_name */ | 1504 | /* if first message, set peer_name */ |
| 1516 | if (con->peer_name.type == 0) | 1505 | if (con->peer_name.type == 0) |
| 1517 | con->peer_name = msg->hdr.src.name; | 1506 | con->peer_name = msg->hdr.src; |
| 1518 | 1507 | ||
| 1519 | con->in_seq++; | 1508 | con->in_seq++; |
| 1520 | mutex_unlock(&con->mutex); | 1509 | mutex_unlock(&con->mutex); |
| 1521 | 1510 | ||
| 1522 | dout("===== %p %llu from %s%lld %d=%s len %d+%d (%u %u %u) =====\n", | 1511 | dout("===== %p %llu from %s%lld %d=%s len %d+%d (%u %u %u) =====\n", |
| 1523 | msg, le64_to_cpu(msg->hdr.seq), | 1512 | msg, le64_to_cpu(msg->hdr.seq), |
| 1524 | ENTITY_NAME(msg->hdr.src.name), | 1513 | ENTITY_NAME(msg->hdr.src), |
| 1525 | le16_to_cpu(msg->hdr.type), | 1514 | le16_to_cpu(msg->hdr.type), |
| 1526 | ceph_msg_type_name(le16_to_cpu(msg->hdr.type)), | 1515 | ceph_msg_type_name(le16_to_cpu(msg->hdr.type)), |
| 1527 | le32_to_cpu(msg->hdr.front_len), | 1516 | le32_to_cpu(msg->hdr.front_len), |
| @@ -1546,7 +1535,6 @@ static int try_write(struct ceph_connection *con) | |||
| 1546 | dout("try_write start %p state %lu nref %d\n", con, con->state, | 1535 | dout("try_write start %p state %lu nref %d\n", con, con->state, |
| 1547 | atomic_read(&con->nref)); | 1536 | atomic_read(&con->nref)); |
| 1548 | 1537 | ||
| 1549 | mutex_lock(&con->mutex); | ||
| 1550 | more: | 1538 | more: |
| 1551 | dout("try_write out_kvec_bytes %d\n", con->out_kvec_bytes); | 1539 | dout("try_write out_kvec_bytes %d\n", con->out_kvec_bytes); |
| 1552 | 1540 | ||
| @@ -1639,7 +1627,6 @@ do_next: | |||
| 1639 | done: | 1627 | done: |
| 1640 | ret = 0; | 1628 | ret = 0; |
| 1641 | out: | 1629 | out: |
| 1642 | mutex_unlock(&con->mutex); | ||
| 1643 | dout("try_write done on %p\n", con); | 1630 | dout("try_write done on %p\n", con); |
| 1644 | return ret; | 1631 | return ret; |
| 1645 | } | 1632 | } |
| @@ -1651,7 +1638,6 @@ out: | |||
| 1651 | */ | 1638 | */ |
| 1652 | static int try_read(struct ceph_connection *con) | 1639 | static int try_read(struct ceph_connection *con) |
| 1653 | { | 1640 | { |
| 1654 | struct ceph_messenger *msgr; | ||
| 1655 | int ret = -1; | 1641 | int ret = -1; |
| 1656 | 1642 | ||
| 1657 | if (!con->sock) | 1643 | if (!con->sock) |
| @@ -1661,9 +1647,6 @@ static int try_read(struct ceph_connection *con) | |||
| 1661 | return 0; | 1647 | return 0; |
| 1662 | 1648 | ||
| 1663 | dout("try_read start on %p\n", con); | 1649 | dout("try_read start on %p\n", con); |
| 1664 | msgr = con->msgr; | ||
| 1665 | |||
| 1666 | mutex_lock(&con->mutex); | ||
| 1667 | 1650 | ||
| 1668 | more: | 1651 | more: |
| 1669 | dout("try_read tag %d in_base_pos %d\n", (int)con->in_tag, | 1652 | dout("try_read tag %d in_base_pos %d\n", (int)con->in_tag, |
| @@ -1758,7 +1741,6 @@ more: | |||
| 1758 | done: | 1741 | done: |
| 1759 | ret = 0; | 1742 | ret = 0; |
| 1760 | out: | 1743 | out: |
| 1761 | mutex_unlock(&con->mutex); | ||
| 1762 | dout("try_read done on %p\n", con); | 1744 | dout("try_read done on %p\n", con); |
| 1763 | return ret; | 1745 | return ret; |
| 1764 | 1746 | ||
| @@ -1830,6 +1812,8 @@ more: | |||
| 1830 | dout("con_work %p start, clearing QUEUED\n", con); | 1812 | dout("con_work %p start, clearing QUEUED\n", con); |
| 1831 | clear_bit(QUEUED, &con->state); | 1813 | clear_bit(QUEUED, &con->state); |
| 1832 | 1814 | ||
| 1815 | mutex_lock(&con->mutex); | ||
| 1816 | |||
| 1833 | if (test_bit(CLOSED, &con->state)) { /* e.g. if we are replaced */ | 1817 | if (test_bit(CLOSED, &con->state)) { /* e.g. if we are replaced */ |
| 1834 | dout("con_work CLOSED\n"); | 1818 | dout("con_work CLOSED\n"); |
| 1835 | con_close_socket(con); | 1819 | con_close_socket(con); |
| @@ -1844,11 +1828,16 @@ more: | |||
| 1844 | if (test_and_clear_bit(SOCK_CLOSED, &con->state) || | 1828 | if (test_and_clear_bit(SOCK_CLOSED, &con->state) || |
| 1845 | try_read(con) < 0 || | 1829 | try_read(con) < 0 || |
| 1846 | try_write(con) < 0) { | 1830 | try_write(con) < 0) { |
| 1831 | mutex_unlock(&con->mutex); | ||
| 1847 | backoff = 1; | 1832 | backoff = 1; |
| 1848 | ceph_fault(con); /* error/fault path */ | 1833 | ceph_fault(con); /* error/fault path */ |
| 1834 | goto done_unlocked; | ||
| 1849 | } | 1835 | } |
| 1850 | 1836 | ||
| 1851 | done: | 1837 | done: |
| 1838 | mutex_unlock(&con->mutex); | ||
| 1839 | |||
| 1840 | done_unlocked: | ||
| 1852 | clear_bit(BUSY, &con->state); | 1841 | clear_bit(BUSY, &con->state); |
| 1853 | dout("con->state=%lu\n", con->state); | 1842 | dout("con->state=%lu\n", con->state); |
| 1854 | if (test_bit(QUEUED, &con->state)) { | 1843 | if (test_bit(QUEUED, &con->state)) { |
| @@ -1947,7 +1936,7 @@ struct ceph_messenger *ceph_messenger_create(struct ceph_entity_addr *myaddr) | |||
| 1947 | 1936 | ||
| 1948 | /* the zero page is needed if a request is "canceled" while the message | 1937 | /* the zero page is needed if a request is "canceled" while the message |
| 1949 | * is being written over the socket */ | 1938 | * is being written over the socket */ |
| 1950 | msgr->zero_page = alloc_page(GFP_KERNEL | __GFP_ZERO); | 1939 | msgr->zero_page = __page_cache_alloc(GFP_KERNEL | __GFP_ZERO); |
| 1951 | if (!msgr->zero_page) { | 1940 | if (!msgr->zero_page) { |
| 1952 | kfree(msgr); | 1941 | kfree(msgr); |
| 1953 | return ERR_PTR(-ENOMEM); | 1942 | return ERR_PTR(-ENOMEM); |
| @@ -1987,9 +1976,7 @@ void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg) | |||
| 1987 | } | 1976 | } |
| 1988 | 1977 | ||
| 1989 | /* set src+dst */ | 1978 | /* set src+dst */ |
| 1990 | msg->hdr.src.name = con->msgr->inst.name; | 1979 | msg->hdr.src = con->msgr->inst.name; |
| 1991 | msg->hdr.src.addr = con->msgr->my_enc_addr; | ||
| 1992 | msg->hdr.orig_src = msg->hdr.src; | ||
| 1993 | 1980 | ||
| 1994 | BUG_ON(msg->front.iov_len != le32_to_cpu(msg->hdr.front_len)); | 1981 | BUG_ON(msg->front.iov_len != le32_to_cpu(msg->hdr.front_len)); |
| 1995 | 1982 | ||
| @@ -2083,12 +2070,11 @@ void ceph_con_keepalive(struct ceph_connection *con) | |||
| 2083 | * construct a new message with given type, size | 2070 | * construct a new message with given type, size |
| 2084 | * the new msg has a ref count of 1. | 2071 | * the new msg has a ref count of 1. |
| 2085 | */ | 2072 | */ |
| 2086 | struct ceph_msg *ceph_msg_new(int type, int front_len, | 2073 | struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags) |
| 2087 | int page_len, int page_off, struct page **pages) | ||
| 2088 | { | 2074 | { |
| 2089 | struct ceph_msg *m; | 2075 | struct ceph_msg *m; |
| 2090 | 2076 | ||
| 2091 | m = kmalloc(sizeof(*m), GFP_NOFS); | 2077 | m = kmalloc(sizeof(*m), flags); |
| 2092 | if (m == NULL) | 2078 | if (m == NULL) |
| 2093 | goto out; | 2079 | goto out; |
| 2094 | kref_init(&m->kref); | 2080 | kref_init(&m->kref); |
| @@ -2100,8 +2086,8 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, | |||
| 2100 | m->hdr.version = 0; | 2086 | m->hdr.version = 0; |
| 2101 | m->hdr.front_len = cpu_to_le32(front_len); | 2087 | m->hdr.front_len = cpu_to_le32(front_len); |
| 2102 | m->hdr.middle_len = 0; | 2088 | m->hdr.middle_len = 0; |
| 2103 | m->hdr.data_len = cpu_to_le32(page_len); | 2089 | m->hdr.data_len = 0; |
| 2104 | m->hdr.data_off = cpu_to_le16(page_off); | 2090 | m->hdr.data_off = 0; |
| 2105 | m->hdr.reserved = 0; | 2091 | m->hdr.reserved = 0; |
| 2106 | m->footer.front_crc = 0; | 2092 | m->footer.front_crc = 0; |
| 2107 | m->footer.middle_crc = 0; | 2093 | m->footer.middle_crc = 0; |
| @@ -2115,11 +2101,11 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, | |||
| 2115 | /* front */ | 2101 | /* front */ |
| 2116 | if (front_len) { | 2102 | if (front_len) { |
| 2117 | if (front_len > PAGE_CACHE_SIZE) { | 2103 | if (front_len > PAGE_CACHE_SIZE) { |
| 2118 | m->front.iov_base = __vmalloc(front_len, GFP_NOFS, | 2104 | m->front.iov_base = __vmalloc(front_len, flags, |
| 2119 | PAGE_KERNEL); | 2105 | PAGE_KERNEL); |
| 2120 | m->front_is_vmalloc = true; | 2106 | m->front_is_vmalloc = true; |
| 2121 | } else { | 2107 | } else { |
| 2122 | m->front.iov_base = kmalloc(front_len, GFP_NOFS); | 2108 | m->front.iov_base = kmalloc(front_len, flags); |
| 2123 | } | 2109 | } |
| 2124 | if (m->front.iov_base == NULL) { | 2110 | if (m->front.iov_base == NULL) { |
| 2125 | pr_err("msg_new can't allocate %d bytes\n", | 2111 | pr_err("msg_new can't allocate %d bytes\n", |
| @@ -2135,19 +2121,18 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, | |||
| 2135 | m->middle = NULL; | 2121 | m->middle = NULL; |
| 2136 | 2122 | ||
| 2137 | /* data */ | 2123 | /* data */ |
| 2138 | m->nr_pages = calc_pages_for(page_off, page_len); | 2124 | m->nr_pages = 0; |
| 2139 | m->pages = pages; | 2125 | m->pages = NULL; |
| 2140 | m->pagelist = NULL; | 2126 | m->pagelist = NULL; |
| 2141 | 2127 | ||
| 2142 | dout("ceph_msg_new %p page %d~%d -> %d\n", m, page_off, page_len, | 2128 | dout("ceph_msg_new %p front %d\n", m, front_len); |
| 2143 | m->nr_pages); | ||
| 2144 | return m; | 2129 | return m; |
| 2145 | 2130 | ||
| 2146 | out2: | 2131 | out2: |
| 2147 | ceph_msg_put(m); | 2132 | ceph_msg_put(m); |
| 2148 | out: | 2133 | out: |
| 2149 | pr_err("msg_new can't create type %d len %d\n", type, front_len); | 2134 | pr_err("msg_new can't create type %d front %d\n", type, front_len); |
| 2150 | return ERR_PTR(-ENOMEM); | 2135 | return NULL; |
| 2151 | } | 2136 | } |
| 2152 | 2137 | ||
| 2153 | /* | 2138 | /* |
| @@ -2190,29 +2175,25 @@ static struct ceph_msg *ceph_alloc_msg(struct ceph_connection *con, | |||
| 2190 | mutex_unlock(&con->mutex); | 2175 | mutex_unlock(&con->mutex); |
| 2191 | msg = con->ops->alloc_msg(con, hdr, skip); | 2176 | msg = con->ops->alloc_msg(con, hdr, skip); |
| 2192 | mutex_lock(&con->mutex); | 2177 | mutex_lock(&con->mutex); |
| 2193 | if (IS_ERR(msg)) | 2178 | if (!msg || *skip) |
| 2194 | return msg; | ||
| 2195 | |||
| 2196 | if (*skip) | ||
| 2197 | return NULL; | 2179 | return NULL; |
| 2198 | } | 2180 | } |
| 2199 | if (!msg) { | 2181 | if (!msg) { |
| 2200 | *skip = 0; | 2182 | *skip = 0; |
| 2201 | msg = ceph_msg_new(type, front_len, 0, 0, NULL); | 2183 | msg = ceph_msg_new(type, front_len, GFP_NOFS); |
| 2202 | if (!msg) { | 2184 | if (!msg) { |
| 2203 | pr_err("unable to allocate msg type %d len %d\n", | 2185 | pr_err("unable to allocate msg type %d len %d\n", |
| 2204 | type, front_len); | 2186 | type, front_len); |
| 2205 | return ERR_PTR(-ENOMEM); | 2187 | return NULL; |
| 2206 | } | 2188 | } |
| 2207 | } | 2189 | } |
| 2208 | memcpy(&msg->hdr, &con->in_hdr, sizeof(con->in_hdr)); | 2190 | memcpy(&msg->hdr, &con->in_hdr, sizeof(con->in_hdr)); |
| 2209 | 2191 | ||
| 2210 | if (middle_len) { | 2192 | if (middle_len && !msg->middle) { |
| 2211 | ret = ceph_alloc_middle(con, msg); | 2193 | ret = ceph_alloc_middle(con, msg); |
| 2212 | |||
| 2213 | if (ret < 0) { | 2194 | if (ret < 0) { |
| 2214 | ceph_msg_put(msg); | 2195 | ceph_msg_put(msg); |
| 2215 | return msg; | 2196 | return NULL; |
| 2216 | } | 2197 | } |
| 2217 | } | 2198 | } |
| 2218 | 2199 | ||
diff --git a/fs/ceph/messenger.h b/fs/ceph/messenger.h index a5caf91cc971..00a9430b1ffc 100644 --- a/fs/ceph/messenger.h +++ b/fs/ceph/messenger.h | |||
| @@ -49,10 +49,8 @@ struct ceph_connection_operations { | |||
| 49 | int *skip); | 49 | int *skip); |
| 50 | }; | 50 | }; |
| 51 | 51 | ||
| 52 | extern const char *ceph_name_type_str(int t); | ||
| 53 | |||
| 54 | /* use format string %s%d */ | 52 | /* use format string %s%d */ |
| 55 | #define ENTITY_NAME(n) ceph_name_type_str((n).type), le64_to_cpu((n).num) | 53 | #define ENTITY_NAME(n) ceph_entity_type_name((n).type), le64_to_cpu((n).num) |
| 56 | 54 | ||
| 57 | struct ceph_messenger { | 55 | struct ceph_messenger { |
| 58 | struct ceph_entity_inst inst; /* my name+address */ | 56 | struct ceph_entity_inst inst; /* my name+address */ |
| @@ -144,6 +142,7 @@ struct ceph_connection { | |||
| 144 | struct ceph_entity_addr peer_addr; /* peer address */ | 142 | struct ceph_entity_addr peer_addr; /* peer address */ |
| 145 | struct ceph_entity_name peer_name; /* peer name */ | 143 | struct ceph_entity_name peer_name; /* peer name */ |
| 146 | struct ceph_entity_addr peer_addr_for_me; | 144 | struct ceph_entity_addr peer_addr_for_me; |
| 145 | unsigned peer_features; | ||
| 147 | u32 connect_seq; /* identify the most recent connection | 146 | u32 connect_seq; /* identify the most recent connection |
| 148 | attempt for this connection, client */ | 147 | attempt for this connection, client */ |
| 149 | u32 peer_global_seq; /* peer's global seq for this connection */ | 148 | u32 peer_global_seq; /* peer's global seq for this connection */ |
| @@ -158,7 +157,6 @@ struct ceph_connection { | |||
| 158 | struct list_head out_queue; | 157 | struct list_head out_queue; |
| 159 | struct list_head out_sent; /* sending or sent but unacked */ | 158 | struct list_head out_sent; /* sending or sent but unacked */ |
| 160 | u64 out_seq; /* last message queued for send */ | 159 | u64 out_seq; /* last message queued for send */ |
| 161 | u64 out_seq_sent; /* last message sent */ | ||
| 162 | bool out_keepalive_pending; | 160 | bool out_keepalive_pending; |
| 163 | 161 | ||
| 164 | u64 in_seq, in_seq_acked; /* last message received, acked */ | 162 | u64 in_seq, in_seq_acked; /* last message received, acked */ |
| @@ -234,9 +232,7 @@ extern void ceph_con_keepalive(struct ceph_connection *con); | |||
| 234 | extern struct ceph_connection *ceph_con_get(struct ceph_connection *con); | 232 | extern struct ceph_connection *ceph_con_get(struct ceph_connection *con); |
| 235 | extern void ceph_con_put(struct ceph_connection *con); | 233 | extern void ceph_con_put(struct ceph_connection *con); |
| 236 | 234 | ||
| 237 | extern struct ceph_msg *ceph_msg_new(int type, int front_len, | 235 | extern struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags); |
| 238 | int page_len, int page_off, | ||
| 239 | struct page **pages); | ||
| 240 | extern void ceph_msg_kfree(struct ceph_msg *m); | 236 | extern void ceph_msg_kfree(struct ceph_msg *m); |
| 241 | 237 | ||
| 242 | 238 | ||
diff --git a/fs/ceph/mon_client.c b/fs/ceph/mon_client.c index 8fdc011ca956..f6510a476e7e 100644 --- a/fs/ceph/mon_client.c +++ b/fs/ceph/mon_client.c | |||
| @@ -28,7 +28,7 @@ | |||
| 28 | * resend any outstanding requests. | 28 | * resend any outstanding requests. |
| 29 | */ | 29 | */ |
| 30 | 30 | ||
| 31 | const static struct ceph_connection_operations mon_con_ops; | 31 | static const struct ceph_connection_operations mon_con_ops; |
| 32 | 32 | ||
| 33 | static int __validate_auth(struct ceph_mon_client *monc); | 33 | static int __validate_auth(struct ceph_mon_client *monc); |
| 34 | 34 | ||
| @@ -104,6 +104,7 @@ static void __send_prepared_auth_request(struct ceph_mon_client *monc, int len) | |||
| 104 | monc->pending_auth = 1; | 104 | monc->pending_auth = 1; |
| 105 | monc->m_auth->front.iov_len = len; | 105 | monc->m_auth->front.iov_len = len; |
| 106 | monc->m_auth->hdr.front_len = cpu_to_le32(len); | 106 | monc->m_auth->hdr.front_len = cpu_to_le32(len); |
| 107 | ceph_con_revoke(monc->con, monc->m_auth); | ||
| 107 | ceph_msg_get(monc->m_auth); /* keep our ref */ | 108 | ceph_msg_get(monc->m_auth); /* keep our ref */ |
| 108 | ceph_con_send(monc->con, monc->m_auth); | 109 | ceph_con_send(monc->con, monc->m_auth); |
| 109 | } | 110 | } |
| @@ -187,16 +188,12 @@ static void __send_subscribe(struct ceph_mon_client *monc) | |||
| 187 | monc->want_next_osdmap); | 188 | monc->want_next_osdmap); |
| 188 | if ((__sub_expired(monc) && !monc->sub_sent) || | 189 | if ((__sub_expired(monc) && !monc->sub_sent) || |
| 189 | monc->want_next_osdmap == 1) { | 190 | monc->want_next_osdmap == 1) { |
| 190 | struct ceph_msg *msg; | 191 | struct ceph_msg *msg = monc->m_subscribe; |
| 191 | struct ceph_mon_subscribe_item *i; | 192 | struct ceph_mon_subscribe_item *i; |
| 192 | void *p, *end; | 193 | void *p, *end; |
| 193 | 194 | ||
| 194 | msg = ceph_msg_new(CEPH_MSG_MON_SUBSCRIBE, 96, 0, 0, NULL); | ||
| 195 | if (!msg) | ||
| 196 | return; | ||
| 197 | |||
| 198 | p = msg->front.iov_base; | 195 | p = msg->front.iov_base; |
| 199 | end = p + msg->front.iov_len; | 196 | end = p + msg->front_max; |
| 200 | 197 | ||
| 201 | dout("__send_subscribe to 'mdsmap' %u+\n", | 198 | dout("__send_subscribe to 'mdsmap' %u+\n", |
| 202 | (unsigned)monc->have_mdsmap); | 199 | (unsigned)monc->have_mdsmap); |
| @@ -226,7 +223,8 @@ static void __send_subscribe(struct ceph_mon_client *monc) | |||
| 226 | 223 | ||
| 227 | msg->front.iov_len = p - msg->front.iov_base; | 224 | msg->front.iov_len = p - msg->front.iov_base; |
| 228 | msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); | 225 | msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); |
| 229 | ceph_con_send(monc->con, msg); | 226 | ceph_con_revoke(monc->con, msg); |
| 227 | ceph_con_send(monc->con, ceph_msg_get(msg)); | ||
| 230 | 228 | ||
| 231 | monc->sub_sent = jiffies | 1; /* never 0 */ | 229 | monc->sub_sent = jiffies | 1; /* never 0 */ |
| 232 | } | 230 | } |
| @@ -353,14 +351,14 @@ out: | |||
| 353 | /* | 351 | /* |
| 354 | * statfs | 352 | * statfs |
| 355 | */ | 353 | */ |
| 356 | static struct ceph_mon_statfs_request *__lookup_statfs( | 354 | static struct ceph_mon_generic_request *__lookup_generic_req( |
| 357 | struct ceph_mon_client *monc, u64 tid) | 355 | struct ceph_mon_client *monc, u64 tid) |
| 358 | { | 356 | { |
| 359 | struct ceph_mon_statfs_request *req; | 357 | struct ceph_mon_generic_request *req; |
| 360 | struct rb_node *n = monc->statfs_request_tree.rb_node; | 358 | struct rb_node *n = monc->generic_request_tree.rb_node; |
| 361 | 359 | ||
| 362 | while (n) { | 360 | while (n) { |
| 363 | req = rb_entry(n, struct ceph_mon_statfs_request, node); | 361 | req = rb_entry(n, struct ceph_mon_generic_request, node); |
| 364 | if (tid < req->tid) | 362 | if (tid < req->tid) |
| 365 | n = n->rb_left; | 363 | n = n->rb_left; |
| 366 | else if (tid > req->tid) | 364 | else if (tid > req->tid) |
| @@ -371,16 +369,16 @@ static struct ceph_mon_statfs_request *__lookup_statfs( | |||
| 371 | return NULL; | 369 | return NULL; |
| 372 | } | 370 | } |
| 373 | 371 | ||
| 374 | static void __insert_statfs(struct ceph_mon_client *monc, | 372 | static void __insert_generic_request(struct ceph_mon_client *monc, |
| 375 | struct ceph_mon_statfs_request *new) | 373 | struct ceph_mon_generic_request *new) |
| 376 | { | 374 | { |
| 377 | struct rb_node **p = &monc->statfs_request_tree.rb_node; | 375 | struct rb_node **p = &monc->generic_request_tree.rb_node; |
| 378 | struct rb_node *parent = NULL; | 376 | struct rb_node *parent = NULL; |
| 379 | struct ceph_mon_statfs_request *req = NULL; | 377 | struct ceph_mon_generic_request *req = NULL; |
| 380 | 378 | ||
| 381 | while (*p) { | 379 | while (*p) { |
| 382 | parent = *p; | 380 | parent = *p; |
| 383 | req = rb_entry(parent, struct ceph_mon_statfs_request, node); | 381 | req = rb_entry(parent, struct ceph_mon_generic_request, node); |
| 384 | if (new->tid < req->tid) | 382 | if (new->tid < req->tid) |
| 385 | p = &(*p)->rb_left; | 383 | p = &(*p)->rb_left; |
| 386 | else if (new->tid > req->tid) | 384 | else if (new->tid > req->tid) |
| @@ -390,113 +388,157 @@ static void __insert_statfs(struct ceph_mon_client *monc, | |||
| 390 | } | 388 | } |
| 391 | 389 | ||
| 392 | rb_link_node(&new->node, parent, p); | 390 | rb_link_node(&new->node, parent, p); |
| 393 | rb_insert_color(&new->node, &monc->statfs_request_tree); | 391 | rb_insert_color(&new->node, &monc->generic_request_tree); |
| 392 | } | ||
| 393 | |||
| 394 | static void release_generic_request(struct kref *kref) | ||
| 395 | { | ||
| 396 | struct ceph_mon_generic_request *req = | ||
| 397 | container_of(kref, struct ceph_mon_generic_request, kref); | ||
| 398 | |||
| 399 | if (req->reply) | ||
| 400 | ceph_msg_put(req->reply); | ||
| 401 | if (req->request) | ||
| 402 | ceph_msg_put(req->request); | ||
| 403 | } | ||
| 404 | |||
| 405 | static void put_generic_request(struct ceph_mon_generic_request *req) | ||
| 406 | { | ||
| 407 | kref_put(&req->kref, release_generic_request); | ||
| 408 | } | ||
| 409 | |||
| 410 | static void get_generic_request(struct ceph_mon_generic_request *req) | ||
| 411 | { | ||
| 412 | kref_get(&req->kref); | ||
| 413 | } | ||
| 414 | |||
| 415 | static struct ceph_msg *get_generic_reply(struct ceph_connection *con, | ||
| 416 | struct ceph_msg_header *hdr, | ||
| 417 | int *skip) | ||
| 418 | { | ||
| 419 | struct ceph_mon_client *monc = con->private; | ||
| 420 | struct ceph_mon_generic_request *req; | ||
| 421 | u64 tid = le64_to_cpu(hdr->tid); | ||
| 422 | struct ceph_msg *m; | ||
| 423 | |||
| 424 | mutex_lock(&monc->mutex); | ||
| 425 | req = __lookup_generic_req(monc, tid); | ||
| 426 | if (!req) { | ||
| 427 | dout("get_generic_reply %lld dne\n", tid); | ||
| 428 | *skip = 1; | ||
| 429 | m = NULL; | ||
| 430 | } else { | ||
| 431 | dout("get_generic_reply %lld got %p\n", tid, req->reply); | ||
| 432 | m = ceph_msg_get(req->reply); | ||
| 433 | /* | ||
| 434 | * we don't need to track the connection reading into | ||
| 435 | * this reply because we only have one open connection | ||
| 436 | * at a time, ever. | ||
| 437 | */ | ||
| 438 | } | ||
| 439 | mutex_unlock(&monc->mutex); | ||
| 440 | return m; | ||
| 394 | } | 441 | } |
| 395 | 442 | ||
| 396 | static void handle_statfs_reply(struct ceph_mon_client *monc, | 443 | static void handle_statfs_reply(struct ceph_mon_client *monc, |
| 397 | struct ceph_msg *msg) | 444 | struct ceph_msg *msg) |
| 398 | { | 445 | { |
| 399 | struct ceph_mon_statfs_request *req; | 446 | struct ceph_mon_generic_request *req; |
| 400 | struct ceph_mon_statfs_reply *reply = msg->front.iov_base; | 447 | struct ceph_mon_statfs_reply *reply = msg->front.iov_base; |
| 401 | u64 tid; | 448 | u64 tid = le64_to_cpu(msg->hdr.tid); |
| 402 | 449 | ||
| 403 | if (msg->front.iov_len != sizeof(*reply)) | 450 | if (msg->front.iov_len != sizeof(*reply)) |
| 404 | goto bad; | 451 | goto bad; |
| 405 | tid = le64_to_cpu(msg->hdr.tid); | ||
| 406 | dout("handle_statfs_reply %p tid %llu\n", msg, tid); | 452 | dout("handle_statfs_reply %p tid %llu\n", msg, tid); |
| 407 | 453 | ||
| 408 | mutex_lock(&monc->mutex); | 454 | mutex_lock(&monc->mutex); |
| 409 | req = __lookup_statfs(monc, tid); | 455 | req = __lookup_generic_req(monc, tid); |
| 410 | if (req) { | 456 | if (req) { |
| 411 | *req->buf = reply->st; | 457 | *(struct ceph_statfs *)req->buf = reply->st; |
| 412 | req->result = 0; | 458 | req->result = 0; |
| 459 | get_generic_request(req); | ||
| 413 | } | 460 | } |
| 414 | mutex_unlock(&monc->mutex); | 461 | mutex_unlock(&monc->mutex); |
| 415 | if (req) | 462 | if (req) { |
| 416 | complete(&req->completion); | 463 | complete(&req->completion); |
| 464 | put_generic_request(req); | ||
| 465 | } | ||
| 417 | return; | 466 | return; |
| 418 | 467 | ||
| 419 | bad: | 468 | bad: |
| 420 | pr_err("corrupt statfs reply, no tid\n"); | 469 | pr_err("corrupt generic reply, no tid\n"); |
| 421 | ceph_msg_dump(msg); | 470 | ceph_msg_dump(msg); |
| 422 | } | 471 | } |
| 423 | 472 | ||
| 424 | /* | 473 | /* |
| 425 | * (re)send a statfs request | 474 | * Do a synchronous statfs(). |
| 426 | */ | 475 | */ |
| 427 | static int send_statfs(struct ceph_mon_client *monc, | 476 | int ceph_monc_do_statfs(struct ceph_mon_client *monc, struct ceph_statfs *buf) |
| 428 | struct ceph_mon_statfs_request *req) | ||
| 429 | { | 477 | { |
| 430 | struct ceph_msg *msg; | 478 | struct ceph_mon_generic_request *req; |
| 431 | struct ceph_mon_statfs *h; | 479 | struct ceph_mon_statfs *h; |
| 480 | int err; | ||
| 432 | 481 | ||
| 433 | dout("send_statfs tid %llu\n", req->tid); | 482 | req = kzalloc(sizeof(*req), GFP_NOFS); |
| 434 | msg = ceph_msg_new(CEPH_MSG_STATFS, sizeof(*h), 0, 0, NULL); | 483 | if (!req) |
| 435 | if (IS_ERR(msg)) | 484 | return -ENOMEM; |
| 436 | return PTR_ERR(msg); | 485 | |
| 437 | req->request = msg; | 486 | kref_init(&req->kref); |
| 438 | msg->hdr.tid = cpu_to_le64(req->tid); | 487 | req->buf = buf; |
| 439 | h = msg->front.iov_base; | 488 | init_completion(&req->completion); |
| 489 | |||
| 490 | err = -ENOMEM; | ||
| 491 | req->request = ceph_msg_new(CEPH_MSG_STATFS, sizeof(*h), GFP_NOFS); | ||
| 492 | if (!req->request) | ||
| 493 | goto out; | ||
| 494 | req->reply = ceph_msg_new(CEPH_MSG_STATFS_REPLY, 1024, GFP_NOFS); | ||
| 495 | if (!req->reply) | ||
| 496 | goto out; | ||
| 497 | |||
| 498 | /* fill out request */ | ||
| 499 | h = req->request->front.iov_base; | ||
| 440 | h->monhdr.have_version = 0; | 500 | h->monhdr.have_version = 0; |
| 441 | h->monhdr.session_mon = cpu_to_le16(-1); | 501 | h->monhdr.session_mon = cpu_to_le16(-1); |
| 442 | h->monhdr.session_mon_tid = 0; | 502 | h->monhdr.session_mon_tid = 0; |
| 443 | h->fsid = monc->monmap->fsid; | 503 | h->fsid = monc->monmap->fsid; |
| 444 | ceph_con_send(monc->con, msg); | ||
| 445 | return 0; | ||
| 446 | } | ||
| 447 | |||
| 448 | /* | ||
| 449 | * Do a synchronous statfs(). | ||
| 450 | */ | ||
| 451 | int ceph_monc_do_statfs(struct ceph_mon_client *monc, struct ceph_statfs *buf) | ||
| 452 | { | ||
| 453 | struct ceph_mon_statfs_request req; | ||
| 454 | int err; | ||
| 455 | |||
| 456 | req.buf = buf; | ||
| 457 | init_completion(&req.completion); | ||
| 458 | |||
| 459 | /* allocate memory for reply */ | ||
| 460 | err = ceph_msgpool_resv(&monc->msgpool_statfs_reply, 1); | ||
| 461 | if (err) | ||
| 462 | return err; | ||
| 463 | 504 | ||
| 464 | /* register request */ | 505 | /* register request */ |
| 465 | mutex_lock(&monc->mutex); | 506 | mutex_lock(&monc->mutex); |
| 466 | req.tid = ++monc->last_tid; | 507 | req->tid = ++monc->last_tid; |
| 467 | req.last_attempt = jiffies; | 508 | req->request->hdr.tid = cpu_to_le64(req->tid); |
| 468 | req.delay = BASE_DELAY_INTERVAL; | 509 | __insert_generic_request(monc, req); |
| 469 | __insert_statfs(monc, &req); | 510 | monc->num_generic_requests++; |
| 470 | monc->num_statfs_requests++; | ||
| 471 | mutex_unlock(&monc->mutex); | 511 | mutex_unlock(&monc->mutex); |
| 472 | 512 | ||
| 473 | /* send request and wait */ | 513 | /* send request and wait */ |
| 474 | err = send_statfs(monc, &req); | 514 | ceph_con_send(monc->con, ceph_msg_get(req->request)); |
| 475 | if (!err) | 515 | err = wait_for_completion_interruptible(&req->completion); |
| 476 | err = wait_for_completion_interruptible(&req.completion); | ||
| 477 | 516 | ||
| 478 | mutex_lock(&monc->mutex); | 517 | mutex_lock(&monc->mutex); |
| 479 | rb_erase(&req.node, &monc->statfs_request_tree); | 518 | rb_erase(&req->node, &monc->generic_request_tree); |
| 480 | monc->num_statfs_requests--; | 519 | monc->num_generic_requests--; |
| 481 | ceph_msgpool_resv(&monc->msgpool_statfs_reply, -1); | ||
| 482 | mutex_unlock(&monc->mutex); | 520 | mutex_unlock(&monc->mutex); |
| 483 | 521 | ||
| 484 | if (!err) | 522 | if (!err) |
| 485 | err = req.result; | 523 | err = req->result; |
| 524 | |||
| 525 | out: | ||
| 526 | kref_put(&req->kref, release_generic_request); | ||
| 486 | return err; | 527 | return err; |
| 487 | } | 528 | } |
| 488 | 529 | ||
| 489 | /* | 530 | /* |
| 490 | * Resend pending statfs requests. | 531 | * Resend pending statfs requests. |
| 491 | */ | 532 | */ |
| 492 | static void __resend_statfs(struct ceph_mon_client *monc) | 533 | static void __resend_generic_request(struct ceph_mon_client *monc) |
| 493 | { | 534 | { |
| 494 | struct ceph_mon_statfs_request *req; | 535 | struct ceph_mon_generic_request *req; |
| 495 | struct rb_node *p; | 536 | struct rb_node *p; |
| 496 | 537 | ||
| 497 | for (p = rb_first(&monc->statfs_request_tree); p; p = rb_next(p)) { | 538 | for (p = rb_first(&monc->generic_request_tree); p; p = rb_next(p)) { |
| 498 | req = rb_entry(p, struct ceph_mon_statfs_request, node); | 539 | req = rb_entry(p, struct ceph_mon_generic_request, node); |
| 499 | send_statfs(monc, req); | 540 | ceph_con_revoke(monc->con, req->request); |
| 541 | ceph_con_send(monc->con, ceph_msg_get(req->request)); | ||
| 500 | } | 542 | } |
| 501 | } | 543 | } |
| 502 | 544 | ||
| @@ -586,26 +628,26 @@ int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl) | |||
| 586 | CEPH_ENTITY_TYPE_AUTH | CEPH_ENTITY_TYPE_MON | | 628 | CEPH_ENTITY_TYPE_AUTH | CEPH_ENTITY_TYPE_MON | |
| 587 | CEPH_ENTITY_TYPE_OSD | CEPH_ENTITY_TYPE_MDS; | 629 | CEPH_ENTITY_TYPE_OSD | CEPH_ENTITY_TYPE_MDS; |
| 588 | 630 | ||
| 589 | /* msg pools */ | 631 | /* msgs */ |
| 590 | err = ceph_msgpool_init(&monc->msgpool_subscribe_ack, | 632 | err = -ENOMEM; |
| 591 | sizeof(struct ceph_mon_subscribe_ack), 1, false); | 633 | monc->m_subscribe_ack = ceph_msg_new(CEPH_MSG_MON_SUBSCRIBE_ACK, |
| 592 | if (err < 0) | 634 | sizeof(struct ceph_mon_subscribe_ack), |
| 635 | GFP_NOFS); | ||
| 636 | if (!monc->m_subscribe_ack) | ||
| 593 | goto out_monmap; | 637 | goto out_monmap; |
| 594 | err = ceph_msgpool_init(&monc->msgpool_statfs_reply, | 638 | |
| 595 | sizeof(struct ceph_mon_statfs_reply), 0, false); | 639 | monc->m_subscribe = ceph_msg_new(CEPH_MSG_MON_SUBSCRIBE, 96, GFP_NOFS); |
| 596 | if (err < 0) | 640 | if (!monc->m_subscribe) |
| 597 | goto out_pool1; | 641 | goto out_subscribe_ack; |
| 598 | err = ceph_msgpool_init(&monc->msgpool_auth_reply, 4096, 1, false); | 642 | |
| 599 | if (err < 0) | 643 | monc->m_auth_reply = ceph_msg_new(CEPH_MSG_AUTH_REPLY, 4096, GFP_NOFS); |
| 600 | goto out_pool2; | 644 | if (!monc->m_auth_reply) |
| 601 | 645 | goto out_subscribe; | |
| 602 | monc->m_auth = ceph_msg_new(CEPH_MSG_AUTH, 4096, 0, 0, NULL); | 646 | |
| 647 | monc->m_auth = ceph_msg_new(CEPH_MSG_AUTH, 4096, GFP_NOFS); | ||
| 603 | monc->pending_auth = 0; | 648 | monc->pending_auth = 0; |
| 604 | if (IS_ERR(monc->m_auth)) { | 649 | if (!monc->m_auth) |
| 605 | err = PTR_ERR(monc->m_auth); | 650 | goto out_auth_reply; |
| 606 | monc->m_auth = NULL; | ||
| 607 | goto out_pool3; | ||
| 608 | } | ||
| 609 | 651 | ||
| 610 | monc->cur_mon = -1; | 652 | monc->cur_mon = -1; |
| 611 | monc->hunting = true; | 653 | monc->hunting = true; |
| @@ -613,8 +655,8 @@ int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl) | |||
| 613 | monc->sub_sent = 0; | 655 | monc->sub_sent = 0; |
| 614 | 656 | ||
| 615 | INIT_DELAYED_WORK(&monc->delayed_work, delayed_work); | 657 | INIT_DELAYED_WORK(&monc->delayed_work, delayed_work); |
| 616 | monc->statfs_request_tree = RB_ROOT; | 658 | monc->generic_request_tree = RB_ROOT; |
| 617 | monc->num_statfs_requests = 0; | 659 | monc->num_generic_requests = 0; |
| 618 | monc->last_tid = 0; | 660 | monc->last_tid = 0; |
| 619 | 661 | ||
| 620 | monc->have_mdsmap = 0; | 662 | monc->have_mdsmap = 0; |
| @@ -622,12 +664,12 @@ int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl) | |||
| 622 | monc->want_next_osdmap = 1; | 664 | monc->want_next_osdmap = 1; |
| 623 | return 0; | 665 | return 0; |
| 624 | 666 | ||
| 625 | out_pool3: | 667 | out_auth_reply: |
| 626 | ceph_msgpool_destroy(&monc->msgpool_auth_reply); | 668 | ceph_msg_put(monc->m_auth_reply); |
| 627 | out_pool2: | 669 | out_subscribe: |
| 628 | ceph_msgpool_destroy(&monc->msgpool_subscribe_ack); | 670 | ceph_msg_put(monc->m_subscribe); |
| 629 | out_pool1: | 671 | out_subscribe_ack: |
| 630 | ceph_msgpool_destroy(&monc->msgpool_statfs_reply); | 672 | ceph_msg_put(monc->m_subscribe_ack); |
| 631 | out_monmap: | 673 | out_monmap: |
| 632 | kfree(monc->monmap); | 674 | kfree(monc->monmap); |
| 633 | out: | 675 | out: |
| @@ -651,9 +693,9 @@ void ceph_monc_stop(struct ceph_mon_client *monc) | |||
| 651 | ceph_auth_destroy(monc->auth); | 693 | ceph_auth_destroy(monc->auth); |
| 652 | 694 | ||
| 653 | ceph_msg_put(monc->m_auth); | 695 | ceph_msg_put(monc->m_auth); |
| 654 | ceph_msgpool_destroy(&monc->msgpool_subscribe_ack); | 696 | ceph_msg_put(monc->m_auth_reply); |
| 655 | ceph_msgpool_destroy(&monc->msgpool_statfs_reply); | 697 | ceph_msg_put(monc->m_subscribe); |
| 656 | ceph_msgpool_destroy(&monc->msgpool_auth_reply); | 698 | ceph_msg_put(monc->m_subscribe_ack); |
| 657 | 699 | ||
| 658 | kfree(monc->monmap); | 700 | kfree(monc->monmap); |
| 659 | } | 701 | } |
| @@ -681,7 +723,7 @@ static void handle_auth_reply(struct ceph_mon_client *monc, | |||
| 681 | monc->client->msgr->inst.name.num = monc->auth->global_id; | 723 | monc->client->msgr->inst.name.num = monc->auth->global_id; |
| 682 | 724 | ||
| 683 | __send_subscribe(monc); | 725 | __send_subscribe(monc); |
| 684 | __resend_statfs(monc); | 726 | __resend_generic_request(monc); |
| 685 | } | 727 | } |
| 686 | mutex_unlock(&monc->mutex); | 728 | mutex_unlock(&monc->mutex); |
| 687 | } | 729 | } |
| @@ -770,18 +812,17 @@ static struct ceph_msg *mon_alloc_msg(struct ceph_connection *con, | |||
| 770 | 812 | ||
| 771 | switch (type) { | 813 | switch (type) { |
| 772 | case CEPH_MSG_MON_SUBSCRIBE_ACK: | 814 | case CEPH_MSG_MON_SUBSCRIBE_ACK: |
| 773 | m = ceph_msgpool_get(&monc->msgpool_subscribe_ack, front_len); | 815 | m = ceph_msg_get(monc->m_subscribe_ack); |
| 774 | break; | 816 | break; |
| 775 | case CEPH_MSG_STATFS_REPLY: | 817 | case CEPH_MSG_STATFS_REPLY: |
| 776 | m = ceph_msgpool_get(&monc->msgpool_statfs_reply, front_len); | 818 | return get_generic_reply(con, hdr, skip); |
| 777 | break; | ||
| 778 | case CEPH_MSG_AUTH_REPLY: | 819 | case CEPH_MSG_AUTH_REPLY: |
| 779 | m = ceph_msgpool_get(&monc->msgpool_auth_reply, front_len); | 820 | m = ceph_msg_get(monc->m_auth_reply); |
| 780 | break; | 821 | break; |
| 781 | case CEPH_MSG_MON_MAP: | 822 | case CEPH_MSG_MON_MAP: |
| 782 | case CEPH_MSG_MDS_MAP: | 823 | case CEPH_MSG_MDS_MAP: |
| 783 | case CEPH_MSG_OSD_MAP: | 824 | case CEPH_MSG_OSD_MAP: |
| 784 | m = ceph_msg_new(type, front_len, 0, 0, NULL); | 825 | m = ceph_msg_new(type, front_len, GFP_NOFS); |
| 785 | break; | 826 | break; |
| 786 | } | 827 | } |
| 787 | 828 | ||
| @@ -826,7 +867,7 @@ out: | |||
| 826 | mutex_unlock(&monc->mutex); | 867 | mutex_unlock(&monc->mutex); |
| 827 | } | 868 | } |
| 828 | 869 | ||
| 829 | const static struct ceph_connection_operations mon_con_ops = { | 870 | static const struct ceph_connection_operations mon_con_ops = { |
| 830 | .get = ceph_con_get, | 871 | .get = ceph_con_get, |
| 831 | .put = ceph_con_put, | 872 | .put = ceph_con_put, |
| 832 | .dispatch = dispatch, | 873 | .dispatch = dispatch, |
diff --git a/fs/ceph/mon_client.h b/fs/ceph/mon_client.h index b958ad5afa06..174d794321d0 100644 --- a/fs/ceph/mon_client.h +++ b/fs/ceph/mon_client.h | |||
| @@ -2,10 +2,10 @@ | |||
| 2 | #define _FS_CEPH_MON_CLIENT_H | 2 | #define _FS_CEPH_MON_CLIENT_H |
| 3 | 3 | ||
| 4 | #include <linux/completion.h> | 4 | #include <linux/completion.h> |
| 5 | #include <linux/kref.h> | ||
| 5 | #include <linux/rbtree.h> | 6 | #include <linux/rbtree.h> |
| 6 | 7 | ||
| 7 | #include "messenger.h" | 8 | #include "messenger.h" |
| 8 | #include "msgpool.h" | ||
| 9 | 9 | ||
| 10 | struct ceph_client; | 10 | struct ceph_client; |
| 11 | struct ceph_mount_args; | 11 | struct ceph_mount_args; |
| @@ -22,7 +22,7 @@ struct ceph_monmap { | |||
| 22 | }; | 22 | }; |
| 23 | 23 | ||
| 24 | struct ceph_mon_client; | 24 | struct ceph_mon_client; |
| 25 | struct ceph_mon_statfs_request; | 25 | struct ceph_mon_generic_request; |
| 26 | 26 | ||
| 27 | 27 | ||
| 28 | /* | 28 | /* |
| @@ -40,17 +40,19 @@ struct ceph_mon_request { | |||
| 40 | }; | 40 | }; |
| 41 | 41 | ||
| 42 | /* | 42 | /* |
| 43 | * statfs() is done a bit differently because we need to get data back | 43 | * ceph_mon_generic_request is being used for the statfs and poolop requests |
| 44 | * which are bening done a bit differently because we need to get data back | ||
| 44 | * to the caller | 45 | * to the caller |
| 45 | */ | 46 | */ |
| 46 | struct ceph_mon_statfs_request { | 47 | struct ceph_mon_generic_request { |
| 48 | struct kref kref; | ||
| 47 | u64 tid; | 49 | u64 tid; |
| 48 | struct rb_node node; | 50 | struct rb_node node; |
| 49 | int result; | 51 | int result; |
| 50 | struct ceph_statfs *buf; | 52 | void *buf; |
| 51 | struct completion completion; | 53 | struct completion completion; |
| 52 | unsigned long last_attempt, delay; /* jiffies */ | ||
| 53 | struct ceph_msg *request; /* original request */ | 54 | struct ceph_msg *request; /* original request */ |
| 55 | struct ceph_msg *reply; /* and reply */ | ||
| 54 | }; | 56 | }; |
| 55 | 57 | ||
| 56 | struct ceph_mon_client { | 58 | struct ceph_mon_client { |
| @@ -61,7 +63,7 @@ struct ceph_mon_client { | |||
| 61 | struct delayed_work delayed_work; | 63 | struct delayed_work delayed_work; |
| 62 | 64 | ||
| 63 | struct ceph_auth_client *auth; | 65 | struct ceph_auth_client *auth; |
| 64 | struct ceph_msg *m_auth; | 66 | struct ceph_msg *m_auth, *m_auth_reply, *m_subscribe, *m_subscribe_ack; |
| 65 | int pending_auth; | 67 | int pending_auth; |
| 66 | 68 | ||
| 67 | bool hunting; | 69 | bool hunting; |
| @@ -70,14 +72,9 @@ struct ceph_mon_client { | |||
| 70 | struct ceph_connection *con; | 72 | struct ceph_connection *con; |
| 71 | bool have_fsid; | 73 | bool have_fsid; |
| 72 | 74 | ||
| 73 | /* msg pools */ | 75 | /* pending generic requests */ |
| 74 | struct ceph_msgpool msgpool_subscribe_ack; | 76 | struct rb_root generic_request_tree; |
| 75 | struct ceph_msgpool msgpool_statfs_reply; | 77 | int num_generic_requests; |
| 76 | struct ceph_msgpool msgpool_auth_reply; | ||
| 77 | |||
| 78 | /* pending statfs requests */ | ||
| 79 | struct rb_root statfs_request_tree; | ||
| 80 | int num_statfs_requests; | ||
| 81 | u64 last_tid; | 78 | u64 last_tid; |
| 82 | 79 | ||
| 83 | /* mds/osd map */ | 80 | /* mds/osd map */ |
diff --git a/fs/ceph/msgpool.c b/fs/ceph/msgpool.c index ca3b44a89f2d..dd65a6438131 100644 --- a/fs/ceph/msgpool.c +++ b/fs/ceph/msgpool.c | |||
| @@ -7,180 +7,58 @@ | |||
| 7 | 7 | ||
| 8 | #include "msgpool.h" | 8 | #include "msgpool.h" |
| 9 | 9 | ||
| 10 | /* | 10 | static void *alloc_fn(gfp_t gfp_mask, void *arg) |
| 11 | * We use msg pools to preallocate memory for messages we expect to | 11 | { |
| 12 | * receive over the wire, to avoid getting ourselves into OOM | 12 | struct ceph_msgpool *pool = arg; |
| 13 | * conditions at unexpected times. We take use a few different | 13 | void *p; |
| 14 | * strategies: | ||
| 15 | * | ||
| 16 | * - for request/response type interactions, we preallocate the | ||
| 17 | * memory needed for the response when we generate the request. | ||
| 18 | * | ||
| 19 | * - for messages we can receive at any time from the MDS, we preallocate | ||
| 20 | * a pool of messages we can re-use. | ||
| 21 | * | ||
| 22 | * - for writeback, we preallocate some number of messages to use for | ||
| 23 | * requests and their replies, so that we always make forward | ||
| 24 | * progress. | ||
| 25 | * | ||
| 26 | * The msgpool behaves like a mempool_t, but keeps preallocated | ||
| 27 | * ceph_msgs strung together on a list_head instead of using a pointer | ||
| 28 | * vector. This avoids vector reallocation when we adjust the number | ||
| 29 | * of preallocated items (which happens frequently). | ||
| 30 | */ | ||
| 31 | 14 | ||
| 15 | p = ceph_msg_new(0, pool->front_len, gfp_mask); | ||
| 16 | if (!p) | ||
| 17 | pr_err("msgpool %s alloc failed\n", pool->name); | ||
| 18 | return p; | ||
| 19 | } | ||
| 32 | 20 | ||
| 33 | /* | 21 | static void free_fn(void *element, void *arg) |
| 34 | * Allocate or release as necessary to meet our target pool size. | ||
| 35 | */ | ||
| 36 | static int __fill_msgpool(struct ceph_msgpool *pool) | ||
| 37 | { | 22 | { |
| 38 | struct ceph_msg *msg; | 23 | ceph_msg_put(element); |
| 39 | |||
| 40 | while (pool->num < pool->min) { | ||
| 41 | dout("fill_msgpool %p %d/%d allocating\n", pool, pool->num, | ||
| 42 | pool->min); | ||
| 43 | spin_unlock(&pool->lock); | ||
| 44 | msg = ceph_msg_new(0, pool->front_len, 0, 0, NULL); | ||
| 45 | spin_lock(&pool->lock); | ||
| 46 | if (IS_ERR(msg)) | ||
| 47 | return PTR_ERR(msg); | ||
| 48 | msg->pool = pool; | ||
| 49 | list_add(&msg->list_head, &pool->msgs); | ||
| 50 | pool->num++; | ||
| 51 | } | ||
| 52 | while (pool->num > pool->min) { | ||
| 53 | msg = list_first_entry(&pool->msgs, struct ceph_msg, list_head); | ||
| 54 | dout("fill_msgpool %p %d/%d releasing %p\n", pool, pool->num, | ||
| 55 | pool->min, msg); | ||
| 56 | list_del_init(&msg->list_head); | ||
| 57 | pool->num--; | ||
| 58 | ceph_msg_kfree(msg); | ||
| 59 | } | ||
| 60 | return 0; | ||
| 61 | } | 24 | } |
| 62 | 25 | ||
| 63 | int ceph_msgpool_init(struct ceph_msgpool *pool, | 26 | int ceph_msgpool_init(struct ceph_msgpool *pool, |
| 64 | int front_len, int min, bool blocking) | 27 | int front_len, int size, bool blocking, const char *name) |
| 65 | { | 28 | { |
| 66 | int ret; | ||
| 67 | |||
| 68 | dout("msgpool_init %p front_len %d min %d\n", pool, front_len, min); | ||
| 69 | spin_lock_init(&pool->lock); | ||
| 70 | pool->front_len = front_len; | 29 | pool->front_len = front_len; |
| 71 | INIT_LIST_HEAD(&pool->msgs); | 30 | pool->pool = mempool_create(size, alloc_fn, free_fn, pool); |
| 72 | pool->num = 0; | 31 | if (!pool->pool) |
| 73 | pool->min = min; | 32 | return -ENOMEM; |
| 74 | pool->blocking = blocking; | 33 | pool->name = name; |
| 75 | init_waitqueue_head(&pool->wait); | 34 | return 0; |
| 76 | |||
| 77 | spin_lock(&pool->lock); | ||
| 78 | ret = __fill_msgpool(pool); | ||
| 79 | spin_unlock(&pool->lock); | ||
| 80 | return ret; | ||
| 81 | } | 35 | } |
| 82 | 36 | ||
| 83 | void ceph_msgpool_destroy(struct ceph_msgpool *pool) | 37 | void ceph_msgpool_destroy(struct ceph_msgpool *pool) |
| 84 | { | 38 | { |
| 85 | dout("msgpool_destroy %p\n", pool); | 39 | mempool_destroy(pool->pool); |
| 86 | spin_lock(&pool->lock); | ||
| 87 | pool->min = 0; | ||
| 88 | __fill_msgpool(pool); | ||
| 89 | spin_unlock(&pool->lock); | ||
| 90 | } | 40 | } |
| 91 | 41 | ||
| 92 | int ceph_msgpool_resv(struct ceph_msgpool *pool, int delta) | 42 | struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *pool, |
| 43 | int front_len) | ||
| 93 | { | 44 | { |
| 94 | int ret; | 45 | if (front_len > pool->front_len) { |
| 95 | 46 | pr_err("msgpool_get pool %s need front %d, pool size is %d\n", | |
| 96 | spin_lock(&pool->lock); | 47 | pool->name, front_len, pool->front_len); |
| 97 | dout("msgpool_resv %p delta %d\n", pool, delta); | ||
| 98 | pool->min += delta; | ||
| 99 | ret = __fill_msgpool(pool); | ||
| 100 | spin_unlock(&pool->lock); | ||
| 101 | return ret; | ||
| 102 | } | ||
| 103 | |||
| 104 | struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *pool, int front_len) | ||
| 105 | { | ||
| 106 | wait_queue_t wait; | ||
| 107 | struct ceph_msg *msg; | ||
| 108 | |||
| 109 | if (front_len && front_len > pool->front_len) { | ||
| 110 | pr_err("msgpool_get pool %p need front %d, pool size is %d\n", | ||
| 111 | pool, front_len, pool->front_len); | ||
| 112 | WARN_ON(1); | 48 | WARN_ON(1); |
| 113 | 49 | ||
| 114 | /* try to alloc a fresh message */ | 50 | /* try to alloc a fresh message */ |
| 115 | msg = ceph_msg_new(0, front_len, 0, 0, NULL); | 51 | return ceph_msg_new(0, front_len, GFP_NOFS); |
| 116 | if (!IS_ERR(msg)) | ||
| 117 | return msg; | ||
| 118 | } | ||
| 119 | |||
| 120 | if (!front_len) | ||
| 121 | front_len = pool->front_len; | ||
| 122 | |||
| 123 | if (pool->blocking) { | ||
| 124 | /* mempool_t behavior; first try to alloc */ | ||
| 125 | msg = ceph_msg_new(0, front_len, 0, 0, NULL); | ||
| 126 | if (!IS_ERR(msg)) | ||
| 127 | return msg; | ||
| 128 | } | 52 | } |
| 129 | 53 | ||
| 130 | while (1) { | 54 | return mempool_alloc(pool->pool, GFP_NOFS); |
| 131 | spin_lock(&pool->lock); | ||
| 132 | if (likely(pool->num)) { | ||
| 133 | msg = list_entry(pool->msgs.next, struct ceph_msg, | ||
| 134 | list_head); | ||
| 135 | list_del_init(&msg->list_head); | ||
| 136 | pool->num--; | ||
| 137 | dout("msgpool_get %p got %p, now %d/%d\n", pool, msg, | ||
| 138 | pool->num, pool->min); | ||
| 139 | spin_unlock(&pool->lock); | ||
| 140 | return msg; | ||
| 141 | } | ||
| 142 | pr_err("msgpool_get %p now %d/%d, %s\n", pool, pool->num, | ||
| 143 | pool->min, pool->blocking ? "waiting" : "may fail"); | ||
| 144 | spin_unlock(&pool->lock); | ||
| 145 | |||
| 146 | if (!pool->blocking) { | ||
| 147 | WARN_ON(1); | ||
| 148 | |||
| 149 | /* maybe we can allocate it now? */ | ||
| 150 | msg = ceph_msg_new(0, front_len, 0, 0, NULL); | ||
| 151 | if (!IS_ERR(msg)) | ||
| 152 | return msg; | ||
| 153 | |||
| 154 | pr_err("msgpool_get %p empty + alloc failed\n", pool); | ||
| 155 | return ERR_PTR(-ENOMEM); | ||
| 156 | } | ||
| 157 | |||
| 158 | init_wait(&wait); | ||
| 159 | prepare_to_wait(&pool->wait, &wait, TASK_UNINTERRUPTIBLE); | ||
| 160 | schedule(); | ||
| 161 | finish_wait(&pool->wait, &wait); | ||
| 162 | } | ||
| 163 | } | 55 | } |
| 164 | 56 | ||
| 165 | void ceph_msgpool_put(struct ceph_msgpool *pool, struct ceph_msg *msg) | 57 | void ceph_msgpool_put(struct ceph_msgpool *pool, struct ceph_msg *msg) |
| 166 | { | 58 | { |
| 167 | spin_lock(&pool->lock); | 59 | /* reset msg front_len; user may have changed it */ |
| 168 | if (pool->num < pool->min) { | 60 | msg->front.iov_len = pool->front_len; |
| 169 | /* reset msg front_len; user may have changed it */ | 61 | msg->hdr.front_len = cpu_to_le32(pool->front_len); |
| 170 | msg->front.iov_len = pool->front_len; | ||
| 171 | msg->hdr.front_len = cpu_to_le32(pool->front_len); | ||
| 172 | 62 | ||
| 173 | kref_set(&msg->kref, 1); /* retake a single ref */ | 63 | kref_init(&msg->kref); /* retake single ref */ |
| 174 | list_add(&msg->list_head, &pool->msgs); | ||
| 175 | pool->num++; | ||
| 176 | dout("msgpool_put %p reclaim %p, now %d/%d\n", pool, msg, | ||
| 177 | pool->num, pool->min); | ||
| 178 | spin_unlock(&pool->lock); | ||
| 179 | wake_up(&pool->wait); | ||
| 180 | } else { | ||
| 181 | dout("msgpool_put %p drop %p, at %d/%d\n", pool, msg, | ||
| 182 | pool->num, pool->min); | ||
| 183 | spin_unlock(&pool->lock); | ||
| 184 | ceph_msg_kfree(msg); | ||
| 185 | } | ||
| 186 | } | 64 | } |
diff --git a/fs/ceph/msgpool.h b/fs/ceph/msgpool.h index bc834bfcd720..a362605f9368 100644 --- a/fs/ceph/msgpool.h +++ b/fs/ceph/msgpool.h | |||
| @@ -1,6 +1,7 @@ | |||
| 1 | #ifndef _FS_CEPH_MSGPOOL | 1 | #ifndef _FS_CEPH_MSGPOOL |
| 2 | #define _FS_CEPH_MSGPOOL | 2 | #define _FS_CEPH_MSGPOOL |
| 3 | 3 | ||
| 4 | #include <linux/mempool.h> | ||
| 4 | #include "messenger.h" | 5 | #include "messenger.h" |
| 5 | 6 | ||
| 6 | /* | 7 | /* |
| @@ -8,18 +9,15 @@ | |||
| 8 | * avoid unexpected OOM conditions. | 9 | * avoid unexpected OOM conditions. |
| 9 | */ | 10 | */ |
| 10 | struct ceph_msgpool { | 11 | struct ceph_msgpool { |
| 11 | spinlock_t lock; | 12 | const char *name; |
| 13 | mempool_t *pool; | ||
| 12 | int front_len; /* preallocated payload size */ | 14 | int front_len; /* preallocated payload size */ |
| 13 | struct list_head msgs; /* msgs in the pool; each has 1 ref */ | ||
| 14 | int num, min; /* cur, min # msgs in the pool */ | ||
| 15 | bool blocking; | ||
| 16 | wait_queue_head_t wait; | ||
| 17 | }; | 15 | }; |
| 18 | 16 | ||
| 19 | extern int ceph_msgpool_init(struct ceph_msgpool *pool, | 17 | extern int ceph_msgpool_init(struct ceph_msgpool *pool, |
| 20 | int front_len, int size, bool blocking); | 18 | int front_len, int size, bool blocking, |
| 19 | const char *name); | ||
| 21 | extern void ceph_msgpool_destroy(struct ceph_msgpool *pool); | 20 | extern void ceph_msgpool_destroy(struct ceph_msgpool *pool); |
| 22 | extern int ceph_msgpool_resv(struct ceph_msgpool *, int delta); | ||
| 23 | extern struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *, | 21 | extern struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *, |
| 24 | int front_len); | 22 | int front_len); |
| 25 | extern void ceph_msgpool_put(struct ceph_msgpool *, struct ceph_msg *); | 23 | extern void ceph_msgpool_put(struct ceph_msgpool *, struct ceph_msg *); |
diff --git a/fs/ceph/msgr.h b/fs/ceph/msgr.h index 8aaab414f3f8..892a0298dfdf 100644 --- a/fs/ceph/msgr.h +++ b/fs/ceph/msgr.h | |||
| @@ -50,7 +50,6 @@ struct ceph_entity_name { | |||
| 50 | #define CEPH_ENTITY_TYPE_MDS 0x02 | 50 | #define CEPH_ENTITY_TYPE_MDS 0x02 |
| 51 | #define CEPH_ENTITY_TYPE_OSD 0x04 | 51 | #define CEPH_ENTITY_TYPE_OSD 0x04 |
| 52 | #define CEPH_ENTITY_TYPE_CLIENT 0x08 | 52 | #define CEPH_ENTITY_TYPE_CLIENT 0x08 |
| 53 | #define CEPH_ENTITY_TYPE_ADMIN 0x10 | ||
| 54 | #define CEPH_ENTITY_TYPE_AUTH 0x20 | 53 | #define CEPH_ENTITY_TYPE_AUTH 0x20 |
| 55 | 54 | ||
| 56 | #define CEPH_ENTITY_TYPE_ANY 0xFF | 55 | #define CEPH_ENTITY_TYPE_ANY 0xFF |
| @@ -120,7 +119,7 @@ struct ceph_msg_connect_reply { | |||
| 120 | /* | 119 | /* |
| 121 | * message header | 120 | * message header |
| 122 | */ | 121 | */ |
| 123 | struct ceph_msg_header { | 122 | struct ceph_msg_header_old { |
| 124 | __le64 seq; /* message seq# for this session */ | 123 | __le64 seq; /* message seq# for this session */ |
| 125 | __le64 tid; /* transaction id */ | 124 | __le64 tid; /* transaction id */ |
| 126 | __le16 type; /* message type */ | 125 | __le16 type; /* message type */ |
| @@ -138,6 +137,24 @@ struct ceph_msg_header { | |||
| 138 | __le32 crc; /* header crc32c */ | 137 | __le32 crc; /* header crc32c */ |
| 139 | } __attribute__ ((packed)); | 138 | } __attribute__ ((packed)); |
| 140 | 139 | ||
| 140 | struct ceph_msg_header { | ||
| 141 | __le64 seq; /* message seq# for this session */ | ||
| 142 | __le64 tid; /* transaction id */ | ||
| 143 | __le16 type; /* message type */ | ||
| 144 | __le16 priority; /* priority. higher value == higher priority */ | ||
| 145 | __le16 version; /* version of message encoding */ | ||
| 146 | |||
| 147 | __le32 front_len; /* bytes in main payload */ | ||
| 148 | __le32 middle_len;/* bytes in middle payload */ | ||
| 149 | __le32 data_len; /* bytes of data payload */ | ||
| 150 | __le16 data_off; /* sender: include full offset; | ||
| 151 | receiver: mask against ~PAGE_MASK */ | ||
| 152 | |||
| 153 | struct ceph_entity_name src; | ||
| 154 | __le32 reserved; | ||
| 155 | __le32 crc; /* header crc32c */ | ||
| 156 | } __attribute__ ((packed)); | ||
| 157 | |||
| 141 | #define CEPH_MSG_PRIO_LOW 64 | 158 | #define CEPH_MSG_PRIO_LOW 64 |
| 142 | #define CEPH_MSG_PRIO_DEFAULT 127 | 159 | #define CEPH_MSG_PRIO_DEFAULT 127 |
| 143 | #define CEPH_MSG_PRIO_HIGH 196 | 160 | #define CEPH_MSG_PRIO_HIGH 196 |
diff --git a/fs/ceph/osd_client.c b/fs/ceph/osd_client.c index 3514f71ff85f..afa7bb3895c4 100644 --- a/fs/ceph/osd_client.c +++ b/fs/ceph/osd_client.c | |||
| @@ -16,7 +16,7 @@ | |||
| 16 | #define OSD_OP_FRONT_LEN 4096 | 16 | #define OSD_OP_FRONT_LEN 4096 |
| 17 | #define OSD_OPREPLY_FRONT_LEN 512 | 17 | #define OSD_OPREPLY_FRONT_LEN 512 |
| 18 | 18 | ||
| 19 | const static struct ceph_connection_operations osd_con_ops; | 19 | static const struct ceph_connection_operations osd_con_ops; |
| 20 | static int __kick_requests(struct ceph_osd_client *osdc, | 20 | static int __kick_requests(struct ceph_osd_client *osdc, |
| 21 | struct ceph_osd *kickosd); | 21 | struct ceph_osd *kickosd); |
| 22 | 22 | ||
| @@ -147,7 +147,7 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, | |||
| 147 | req = kzalloc(sizeof(*req), GFP_NOFS); | 147 | req = kzalloc(sizeof(*req), GFP_NOFS); |
| 148 | } | 148 | } |
| 149 | if (req == NULL) | 149 | if (req == NULL) |
| 150 | return ERR_PTR(-ENOMEM); | 150 | return NULL; |
| 151 | 151 | ||
| 152 | req->r_osdc = osdc; | 152 | req->r_osdc = osdc; |
| 153 | req->r_mempool = use_mempool; | 153 | req->r_mempool = use_mempool; |
| @@ -164,10 +164,10 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, | |||
| 164 | msg = ceph_msgpool_get(&osdc->msgpool_op_reply, 0); | 164 | msg = ceph_msgpool_get(&osdc->msgpool_op_reply, 0); |
| 165 | else | 165 | else |
| 166 | msg = ceph_msg_new(CEPH_MSG_OSD_OPREPLY, | 166 | msg = ceph_msg_new(CEPH_MSG_OSD_OPREPLY, |
| 167 | OSD_OPREPLY_FRONT_LEN, 0, 0, NULL); | 167 | OSD_OPREPLY_FRONT_LEN, GFP_NOFS); |
| 168 | if (IS_ERR(msg)) { | 168 | if (!msg) { |
| 169 | ceph_osdc_put_request(req); | 169 | ceph_osdc_put_request(req); |
| 170 | return ERR_PTR(PTR_ERR(msg)); | 170 | return NULL; |
| 171 | } | 171 | } |
| 172 | req->r_reply = msg; | 172 | req->r_reply = msg; |
| 173 | 173 | ||
| @@ -178,10 +178,10 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, | |||
| 178 | if (use_mempool) | 178 | if (use_mempool) |
| 179 | msg = ceph_msgpool_get(&osdc->msgpool_op, 0); | 179 | msg = ceph_msgpool_get(&osdc->msgpool_op, 0); |
| 180 | else | 180 | else |
| 181 | msg = ceph_msg_new(CEPH_MSG_OSD_OP, msg_size, 0, 0, NULL); | 181 | msg = ceph_msg_new(CEPH_MSG_OSD_OP, msg_size, GFP_NOFS); |
| 182 | if (IS_ERR(msg)) { | 182 | if (!msg) { |
| 183 | ceph_osdc_put_request(req); | 183 | ceph_osdc_put_request(req); |
| 184 | return ERR_PTR(PTR_ERR(msg)); | 184 | return NULL; |
| 185 | } | 185 | } |
| 186 | msg->hdr.type = cpu_to_le16(CEPH_MSG_OSD_OP); | 186 | msg->hdr.type = cpu_to_le16(CEPH_MSG_OSD_OP); |
| 187 | memset(msg->front.iov_base, 0, msg->front.iov_len); | 187 | memset(msg->front.iov_base, 0, msg->front.iov_len); |
| @@ -715,7 +715,7 @@ static void handle_timeout(struct work_struct *work) | |||
| 715 | * should mark the osd as failed and we should find out about | 715 | * should mark the osd as failed and we should find out about |
| 716 | * it from an updated osd map. | 716 | * it from an updated osd map. |
| 717 | */ | 717 | */ |
| 718 | while (!list_empty(&osdc->req_lru)) { | 718 | while (timeout && !list_empty(&osdc->req_lru)) { |
| 719 | req = list_entry(osdc->req_lru.next, struct ceph_osd_request, | 719 | req = list_entry(osdc->req_lru.next, struct ceph_osd_request, |
| 720 | r_req_lru_item); | 720 | r_req_lru_item); |
| 721 | 721 | ||
| @@ -1078,6 +1078,7 @@ done: | |||
| 1078 | if (newmap) | 1078 | if (newmap) |
| 1079 | kick_requests(osdc, NULL); | 1079 | kick_requests(osdc, NULL); |
| 1080 | up_read(&osdc->map_sem); | 1080 | up_read(&osdc->map_sem); |
| 1081 | wake_up(&osdc->client->auth_wq); | ||
| 1081 | return; | 1082 | return; |
| 1082 | 1083 | ||
| 1083 | bad: | 1084 | bad: |
| @@ -1087,45 +1088,6 @@ bad: | |||
| 1087 | return; | 1088 | return; |
| 1088 | } | 1089 | } |
| 1089 | 1090 | ||
| 1090 | |||
| 1091 | /* | ||
| 1092 | * A read request prepares specific pages that data is to be read into. | ||
| 1093 | * When a message is being read off the wire, we call prepare_pages to | ||
| 1094 | * find those pages. | ||
| 1095 | * 0 = success, -1 failure. | ||
| 1096 | */ | ||
| 1097 | static int __prepare_pages(struct ceph_connection *con, | ||
| 1098 | struct ceph_msg_header *hdr, | ||
| 1099 | struct ceph_osd_request *req, | ||
| 1100 | u64 tid, | ||
| 1101 | struct ceph_msg *m) | ||
| 1102 | { | ||
| 1103 | struct ceph_osd *osd = con->private; | ||
| 1104 | struct ceph_osd_client *osdc; | ||
| 1105 | int ret = -1; | ||
| 1106 | int data_len = le32_to_cpu(hdr->data_len); | ||
| 1107 | unsigned data_off = le16_to_cpu(hdr->data_off); | ||
| 1108 | |||
| 1109 | int want = calc_pages_for(data_off & ~PAGE_MASK, data_len); | ||
| 1110 | |||
| 1111 | if (!osd) | ||
| 1112 | return -1; | ||
| 1113 | |||
| 1114 | osdc = osd->o_osdc; | ||
| 1115 | |||
| 1116 | dout("__prepare_pages on msg %p tid %llu, has %d pages, want %d\n", m, | ||
| 1117 | tid, req->r_num_pages, want); | ||
| 1118 | if (unlikely(req->r_num_pages < want)) | ||
| 1119 | goto out; | ||
| 1120 | m->pages = req->r_pages; | ||
| 1121 | m->nr_pages = req->r_num_pages; | ||
| 1122 | ret = 0; /* success */ | ||
| 1123 | out: | ||
| 1124 | BUG_ON(ret < 0 || m->nr_pages < want); | ||
| 1125 | |||
| 1126 | return ret; | ||
| 1127 | } | ||
| 1128 | |||
| 1129 | /* | 1091 | /* |
| 1130 | * Register request, send initial attempt. | 1092 | * Register request, send initial attempt. |
| 1131 | */ | 1093 | */ |
| @@ -1252,11 +1214,13 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client) | |||
| 1252 | if (!osdc->req_mempool) | 1214 | if (!osdc->req_mempool) |
| 1253 | goto out; | 1215 | goto out; |
| 1254 | 1216 | ||
| 1255 | err = ceph_msgpool_init(&osdc->msgpool_op, OSD_OP_FRONT_LEN, 10, true); | 1217 | err = ceph_msgpool_init(&osdc->msgpool_op, OSD_OP_FRONT_LEN, 10, true, |
| 1218 | "osd_op"); | ||
| 1256 | if (err < 0) | 1219 | if (err < 0) |
| 1257 | goto out_mempool; | 1220 | goto out_mempool; |
| 1258 | err = ceph_msgpool_init(&osdc->msgpool_op_reply, | 1221 | err = ceph_msgpool_init(&osdc->msgpool_op_reply, |
| 1259 | OSD_OPREPLY_FRONT_LEN, 10, true); | 1222 | OSD_OPREPLY_FRONT_LEN, 10, true, |
| 1223 | "osd_op_reply"); | ||
| 1260 | if (err < 0) | 1224 | if (err < 0) |
| 1261 | goto out_msgpool; | 1225 | goto out_msgpool; |
| 1262 | return 0; | 1226 | return 0; |
| @@ -1302,8 +1266,8 @@ int ceph_osdc_readpages(struct ceph_osd_client *osdc, | |||
| 1302 | CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ, | 1266 | CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ, |
| 1303 | NULL, 0, truncate_seq, truncate_size, NULL, | 1267 | NULL, 0, truncate_seq, truncate_size, NULL, |
| 1304 | false, 1); | 1268 | false, 1); |
| 1305 | if (IS_ERR(req)) | 1269 | if (!req) |
| 1306 | return PTR_ERR(req); | 1270 | return -ENOMEM; |
| 1307 | 1271 | ||
| 1308 | /* it may be a short read due to an object boundary */ | 1272 | /* it may be a short read due to an object boundary */ |
| 1309 | req->r_pages = pages; | 1273 | req->r_pages = pages; |
| @@ -1345,8 +1309,8 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino, | |||
| 1345 | snapc, do_sync, | 1309 | snapc, do_sync, |
| 1346 | truncate_seq, truncate_size, mtime, | 1310 | truncate_seq, truncate_size, mtime, |
| 1347 | nofail, 1); | 1311 | nofail, 1); |
| 1348 | if (IS_ERR(req)) | 1312 | if (!req) |
| 1349 | return PTR_ERR(req); | 1313 | return -ENOMEM; |
| 1350 | 1314 | ||
| 1351 | /* it may be a short write due to an object boundary */ | 1315 | /* it may be a short write due to an object boundary */ |
| 1352 | req->r_pages = pages; | 1316 | req->r_pages = pages; |
| @@ -1394,7 +1358,8 @@ static void dispatch(struct ceph_connection *con, struct ceph_msg *msg) | |||
| 1394 | } | 1358 | } |
| 1395 | 1359 | ||
| 1396 | /* | 1360 | /* |
| 1397 | * lookup and return message for incoming reply | 1361 | * lookup and return message for incoming reply. set up reply message |
| 1362 | * pages. | ||
| 1398 | */ | 1363 | */ |
| 1399 | static struct ceph_msg *get_reply(struct ceph_connection *con, | 1364 | static struct ceph_msg *get_reply(struct ceph_connection *con, |
| 1400 | struct ceph_msg_header *hdr, | 1365 | struct ceph_msg_header *hdr, |
| @@ -1407,7 +1372,6 @@ static struct ceph_msg *get_reply(struct ceph_connection *con, | |||
| 1407 | int front = le32_to_cpu(hdr->front_len); | 1372 | int front = le32_to_cpu(hdr->front_len); |
| 1408 | int data_len = le32_to_cpu(hdr->data_len); | 1373 | int data_len = le32_to_cpu(hdr->data_len); |
| 1409 | u64 tid; | 1374 | u64 tid; |
| 1410 | int err; | ||
| 1411 | 1375 | ||
| 1412 | tid = le64_to_cpu(hdr->tid); | 1376 | tid = le64_to_cpu(hdr->tid); |
| 1413 | mutex_lock(&osdc->request_mutex); | 1377 | mutex_lock(&osdc->request_mutex); |
| @@ -1425,13 +1389,14 @@ static struct ceph_msg *get_reply(struct ceph_connection *con, | |||
| 1425 | req->r_reply, req->r_con_filling_msg); | 1389 | req->r_reply, req->r_con_filling_msg); |
| 1426 | ceph_con_revoke_message(req->r_con_filling_msg, req->r_reply); | 1390 | ceph_con_revoke_message(req->r_con_filling_msg, req->r_reply); |
| 1427 | ceph_con_put(req->r_con_filling_msg); | 1391 | ceph_con_put(req->r_con_filling_msg); |
| 1392 | req->r_con_filling_msg = NULL; | ||
| 1428 | } | 1393 | } |
| 1429 | 1394 | ||
| 1430 | if (front > req->r_reply->front.iov_len) { | 1395 | if (front > req->r_reply->front.iov_len) { |
| 1431 | pr_warning("get_reply front %d > preallocated %d\n", | 1396 | pr_warning("get_reply front %d > preallocated %d\n", |
| 1432 | front, (int)req->r_reply->front.iov_len); | 1397 | front, (int)req->r_reply->front.iov_len); |
| 1433 | m = ceph_msg_new(CEPH_MSG_OSD_OPREPLY, front, 0, 0, NULL); | 1398 | m = ceph_msg_new(CEPH_MSG_OSD_OPREPLY, front, GFP_NOFS); |
| 1434 | if (IS_ERR(m)) | 1399 | if (!m) |
| 1435 | goto out; | 1400 | goto out; |
| 1436 | ceph_msg_put(req->r_reply); | 1401 | ceph_msg_put(req->r_reply); |
| 1437 | req->r_reply = m; | 1402 | req->r_reply = m; |
| @@ -1439,12 +1404,19 @@ static struct ceph_msg *get_reply(struct ceph_connection *con, | |||
| 1439 | m = ceph_msg_get(req->r_reply); | 1404 | m = ceph_msg_get(req->r_reply); |
| 1440 | 1405 | ||
| 1441 | if (data_len > 0) { | 1406 | if (data_len > 0) { |
| 1442 | err = __prepare_pages(con, hdr, req, tid, m); | 1407 | unsigned data_off = le16_to_cpu(hdr->data_off); |
| 1443 | if (err < 0) { | 1408 | int want = calc_pages_for(data_off & ~PAGE_MASK, data_len); |
| 1409 | |||
| 1410 | if (unlikely(req->r_num_pages < want)) { | ||
| 1411 | pr_warning("tid %lld reply %d > expected %d pages\n", | ||
| 1412 | tid, want, m->nr_pages); | ||
| 1444 | *skip = 1; | 1413 | *skip = 1; |
| 1445 | ceph_msg_put(m); | 1414 | ceph_msg_put(m); |
| 1446 | m = ERR_PTR(err); | 1415 | m = NULL; |
| 1416 | goto out; | ||
| 1447 | } | 1417 | } |
| 1418 | m->pages = req->r_pages; | ||
| 1419 | m->nr_pages = req->r_num_pages; | ||
| 1448 | } | 1420 | } |
| 1449 | *skip = 0; | 1421 | *skip = 0; |
| 1450 | req->r_con_filling_msg = ceph_con_get(con); | 1422 | req->r_con_filling_msg = ceph_con_get(con); |
| @@ -1466,7 +1438,7 @@ static struct ceph_msg *alloc_msg(struct ceph_connection *con, | |||
| 1466 | 1438 | ||
| 1467 | switch (type) { | 1439 | switch (type) { |
| 1468 | case CEPH_MSG_OSD_MAP: | 1440 | case CEPH_MSG_OSD_MAP: |
| 1469 | return ceph_msg_new(type, front, 0, 0, NULL); | 1441 | return ceph_msg_new(type, front, GFP_NOFS); |
| 1470 | case CEPH_MSG_OSD_OPREPLY: | 1442 | case CEPH_MSG_OSD_OPREPLY: |
| 1471 | return get_reply(con, hdr, skip); | 1443 | return get_reply(con, hdr, skip); |
| 1472 | default: | 1444 | default: |
| @@ -1552,7 +1524,7 @@ static int invalidate_authorizer(struct ceph_connection *con) | |||
| 1552 | return ceph_monc_validate_auth(&osdc->client->monc); | 1524 | return ceph_monc_validate_auth(&osdc->client->monc); |
| 1553 | } | 1525 | } |
| 1554 | 1526 | ||
| 1555 | const static struct ceph_connection_operations osd_con_ops = { | 1527 | static const struct ceph_connection_operations osd_con_ops = { |
| 1556 | .get = get_osd_con, | 1528 | .get = get_osd_con, |
| 1557 | .put = put_osd_con, | 1529 | .put = put_osd_con, |
| 1558 | .dispatch = dispatch, | 1530 | .dispatch = dispatch, |
diff --git a/fs/ceph/pagelist.c b/fs/ceph/pagelist.c index 5f8dbf7c745a..b6859f47d364 100644 --- a/fs/ceph/pagelist.c +++ b/fs/ceph/pagelist.c | |||
| @@ -20,7 +20,7 @@ int ceph_pagelist_release(struct ceph_pagelist *pl) | |||
| 20 | 20 | ||
| 21 | static int ceph_pagelist_addpage(struct ceph_pagelist *pl) | 21 | static int ceph_pagelist_addpage(struct ceph_pagelist *pl) |
| 22 | { | 22 | { |
| 23 | struct page *page = alloc_page(GFP_NOFS); | 23 | struct page *page = __page_cache_alloc(GFP_NOFS); |
| 24 | if (!page) | 24 | if (!page) |
| 25 | return -ENOMEM; | 25 | return -ENOMEM; |
| 26 | pl->room += PAGE_SIZE; | 26 | pl->room += PAGE_SIZE; |
diff --git a/fs/ceph/rados.h b/fs/ceph/rados.h index fd56451a871f..8fcc023056c7 100644 --- a/fs/ceph/rados.h +++ b/fs/ceph/rados.h | |||
| @@ -101,8 +101,8 @@ struct ceph_pg_pool { | |||
| 101 | __le64 snap_seq; /* seq for per-pool snapshot */ | 101 | __le64 snap_seq; /* seq for per-pool snapshot */ |
| 102 | __le32 snap_epoch; /* epoch of last snap */ | 102 | __le32 snap_epoch; /* epoch of last snap */ |
| 103 | __le32 num_snaps; | 103 | __le32 num_snaps; |
| 104 | __le32 num_removed_snap_intervals; | 104 | __le32 num_removed_snap_intervals; /* if non-empty, NO per-pool snaps */ |
| 105 | __le64 uid; | 105 | __le64 auid; /* who owns the pg */ |
| 106 | } __attribute__ ((packed)); | 106 | } __attribute__ ((packed)); |
| 107 | 107 | ||
| 108 | /* | 108 | /* |
| @@ -208,6 +208,7 @@ enum { | |||
| 208 | /* read */ | 208 | /* read */ |
| 209 | CEPH_OSD_OP_GETXATTR = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_ATTR | 1, | 209 | CEPH_OSD_OP_GETXATTR = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_ATTR | 1, |
| 210 | CEPH_OSD_OP_GETXATTRS = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_ATTR | 2, | 210 | CEPH_OSD_OP_GETXATTRS = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_ATTR | 2, |
| 211 | CEPH_OSD_OP_CMPXATTR = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_ATTR | 3, | ||
| 211 | 212 | ||
| 212 | /* write */ | 213 | /* write */ |
| 213 | CEPH_OSD_OP_SETXATTR = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_ATTR | 1, | 214 | CEPH_OSD_OP_SETXATTR = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_ATTR | 1, |
| @@ -305,6 +306,22 @@ enum { | |||
| 305 | #define EOLDSNAPC ERESTART /* ORDERSNAP flag set; writer has old snapc*/ | 306 | #define EOLDSNAPC ERESTART /* ORDERSNAP flag set; writer has old snapc*/ |
| 306 | #define EBLACKLISTED ESHUTDOWN /* blacklisted */ | 307 | #define EBLACKLISTED ESHUTDOWN /* blacklisted */ |
| 307 | 308 | ||
| 309 | /* xattr comparison */ | ||
| 310 | enum { | ||
| 311 | CEPH_OSD_CMPXATTR_OP_NOP = 0, | ||
| 312 | CEPH_OSD_CMPXATTR_OP_EQ = 1, | ||
| 313 | CEPH_OSD_CMPXATTR_OP_NE = 2, | ||
| 314 | CEPH_OSD_CMPXATTR_OP_GT = 3, | ||
| 315 | CEPH_OSD_CMPXATTR_OP_GTE = 4, | ||
| 316 | CEPH_OSD_CMPXATTR_OP_LT = 5, | ||
| 317 | CEPH_OSD_CMPXATTR_OP_LTE = 6 | ||
| 318 | }; | ||
| 319 | |||
| 320 | enum { | ||
| 321 | CEPH_OSD_CMPXATTR_MODE_STRING = 1, | ||
| 322 | CEPH_OSD_CMPXATTR_MODE_U64 = 2 | ||
| 323 | }; | ||
| 324 | |||
| 308 | /* | 325 | /* |
| 309 | * an individual object operation. each may be accompanied by some data | 326 | * an individual object operation. each may be accompanied by some data |
| 310 | * payload | 327 | * payload |
| @@ -321,6 +338,8 @@ struct ceph_osd_op { | |||
| 321 | struct { | 338 | struct { |
| 322 | __le32 name_len; | 339 | __le32 name_len; |
| 323 | __le32 value_len; | 340 | __le32 value_len; |
| 341 | __u8 cmp_op; /* CEPH_OSD_CMPXATTR_OP_* */ | ||
| 342 | __u8 cmp_mode; /* CEPH_OSD_CMPXATTR_MODE_* */ | ||
| 324 | } __attribute__ ((packed)) xattr; | 343 | } __attribute__ ((packed)) xattr; |
| 325 | struct { | 344 | struct { |
| 326 | __u8 class_len; | 345 | __u8 class_len; |
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index d5114db70453..c0b26b6badba 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c | |||
| @@ -512,7 +512,7 @@ int __ceph_finish_cap_snap(struct ceph_inode_info *ci, | |||
| 512 | struct ceph_cap_snap *capsnap) | 512 | struct ceph_cap_snap *capsnap) |
| 513 | { | 513 | { |
| 514 | struct inode *inode = &ci->vfs_inode; | 514 | struct inode *inode = &ci->vfs_inode; |
| 515 | struct ceph_mds_client *mdsc = &ceph_client(inode->i_sb)->mdsc; | 515 | struct ceph_mds_client *mdsc = &ceph_sb_to_client(inode->i_sb)->mdsc; |
| 516 | 516 | ||
| 517 | BUG_ON(capsnap->writing); | 517 | BUG_ON(capsnap->writing); |
| 518 | capsnap->size = inode->i_size; | 518 | capsnap->size = inode->i_size; |
diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 9307bbee6fbe..7c663d9b9f81 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c | |||
| @@ -8,14 +8,11 @@ | |||
| 8 | #include <linux/module.h> | 8 | #include <linux/module.h> |
| 9 | #include <linux/mount.h> | 9 | #include <linux/mount.h> |
| 10 | #include <linux/parser.h> | 10 | #include <linux/parser.h> |
| 11 | #include <linux/rwsem.h> | ||
| 12 | #include <linux/sched.h> | 11 | #include <linux/sched.h> |
| 13 | #include <linux/seq_file.h> | 12 | #include <linux/seq_file.h> |
| 14 | #include <linux/slab.h> | 13 | #include <linux/slab.h> |
| 15 | #include <linux/statfs.h> | 14 | #include <linux/statfs.h> |
| 16 | #include <linux/string.h> | 15 | #include <linux/string.h> |
| 17 | #include <linux/version.h> | ||
| 18 | #include <linux/vmalloc.h> | ||
| 19 | 16 | ||
| 20 | #include "decode.h" | 17 | #include "decode.h" |
| 21 | #include "super.h" | 18 | #include "super.h" |
| @@ -107,12 +104,40 @@ static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
| 107 | static int ceph_syncfs(struct super_block *sb, int wait) | 104 | static int ceph_syncfs(struct super_block *sb, int wait) |
| 108 | { | 105 | { |
| 109 | dout("sync_fs %d\n", wait); | 106 | dout("sync_fs %d\n", wait); |
| 110 | ceph_osdc_sync(&ceph_client(sb)->osdc); | 107 | ceph_osdc_sync(&ceph_sb_to_client(sb)->osdc); |
| 111 | ceph_mdsc_sync(&ceph_client(sb)->mdsc); | 108 | ceph_mdsc_sync(&ceph_sb_to_client(sb)->mdsc); |
| 112 | dout("sync_fs %d done\n", wait); | 109 | dout("sync_fs %d done\n", wait); |
| 113 | return 0; | 110 | return 0; |
| 114 | } | 111 | } |
| 115 | 112 | ||
| 113 | static int default_congestion_kb(void) | ||
| 114 | { | ||
| 115 | int congestion_kb; | ||
| 116 | |||
| 117 | /* | ||
| 118 | * Copied from NFS | ||
| 119 | * | ||
| 120 | * congestion size, scale with available memory. | ||
| 121 | * | ||
| 122 | * 64MB: 8192k | ||
| 123 | * 128MB: 11585k | ||
| 124 | * 256MB: 16384k | ||
| 125 | * 512MB: 23170k | ||
| 126 | * 1GB: 32768k | ||
| 127 | * 2GB: 46340k | ||
| 128 | * 4GB: 65536k | ||
| 129 | * 8GB: 92681k | ||
| 130 | * 16GB: 131072k | ||
| 131 | * | ||
| 132 | * This allows larger machines to have larger/more transfers. | ||
| 133 | * Limit the default to 256M | ||
| 134 | */ | ||
| 135 | congestion_kb = (16*int_sqrt(totalram_pages)) << (PAGE_SHIFT-10); | ||
| 136 | if (congestion_kb > 256*1024) | ||
| 137 | congestion_kb = 256*1024; | ||
| 138 | |||
| 139 | return congestion_kb; | ||
| 140 | } | ||
| 116 | 141 | ||
| 117 | /** | 142 | /** |
| 118 | * ceph_show_options - Show mount options in /proc/mounts | 143 | * ceph_show_options - Show mount options in /proc/mounts |
| @@ -138,6 +163,35 @@ static int ceph_show_options(struct seq_file *m, struct vfsmount *mnt) | |||
| 138 | seq_puts(m, ",nocrc"); | 163 | seq_puts(m, ",nocrc"); |
| 139 | if (args->flags & CEPH_OPT_NOASYNCREADDIR) | 164 | if (args->flags & CEPH_OPT_NOASYNCREADDIR) |
| 140 | seq_puts(m, ",noasyncreaddir"); | 165 | seq_puts(m, ",noasyncreaddir"); |
| 166 | |||
| 167 | if (args->mount_timeout != CEPH_MOUNT_TIMEOUT_DEFAULT) | ||
| 168 | seq_printf(m, ",mount_timeout=%d", args->mount_timeout); | ||
| 169 | if (args->osd_idle_ttl != CEPH_OSD_IDLE_TTL_DEFAULT) | ||
| 170 | seq_printf(m, ",osd_idle_ttl=%d", args->osd_idle_ttl); | ||
| 171 | if (args->osd_timeout != CEPH_OSD_TIMEOUT_DEFAULT) | ||
| 172 | seq_printf(m, ",osdtimeout=%d", args->osd_timeout); | ||
| 173 | if (args->osd_keepalive_timeout != CEPH_OSD_KEEPALIVE_DEFAULT) | ||
| 174 | seq_printf(m, ",osdkeepalivetimeout=%d", | ||
| 175 | args->osd_keepalive_timeout); | ||
| 176 | if (args->wsize) | ||
| 177 | seq_printf(m, ",wsize=%d", args->wsize); | ||
| 178 | if (args->rsize != CEPH_MOUNT_RSIZE_DEFAULT) | ||
| 179 | seq_printf(m, ",rsize=%d", args->rsize); | ||
| 180 | if (args->congestion_kb != default_congestion_kb()) | ||
| 181 | seq_printf(m, ",write_congestion_kb=%d", args->congestion_kb); | ||
| 182 | if (args->caps_wanted_delay_min != CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT) | ||
| 183 | seq_printf(m, ",caps_wanted_delay_min=%d", | ||
| 184 | args->caps_wanted_delay_min); | ||
| 185 | if (args->caps_wanted_delay_max != CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT) | ||
| 186 | seq_printf(m, ",caps_wanted_delay_max=%d", | ||
| 187 | args->caps_wanted_delay_max); | ||
| 188 | if (args->cap_release_safety != CEPH_CAP_RELEASE_SAFETY_DEFAULT) | ||
| 189 | seq_printf(m, ",cap_release_safety=%d", | ||
| 190 | args->cap_release_safety); | ||
| 191 | if (args->max_readdir != CEPH_MAX_READDIR_DEFAULT) | ||
| 192 | seq_printf(m, ",readdir_max_entries=%d", args->max_readdir); | ||
| 193 | if (args->max_readdir_bytes != CEPH_MAX_READDIR_BYTES_DEFAULT) | ||
| 194 | seq_printf(m, ",readdir_max_bytes=%d", args->max_readdir_bytes); | ||
| 141 | if (strcmp(args->snapdir_name, CEPH_SNAPDIRNAME_DEFAULT)) | 195 | if (strcmp(args->snapdir_name, CEPH_SNAPDIRNAME_DEFAULT)) |
| 142 | seq_printf(m, ",snapdirname=%s", args->snapdir_name); | 196 | seq_printf(m, ",snapdirname=%s", args->snapdir_name); |
| 143 | if (args->name) | 197 | if (args->name) |
| @@ -161,35 +215,6 @@ static void ceph_inode_init_once(void *foo) | |||
| 161 | inode_init_once(&ci->vfs_inode); | 215 | inode_init_once(&ci->vfs_inode); |
| 162 | } | 216 | } |
| 163 | 217 | ||
| 164 | static int default_congestion_kb(void) | ||
| 165 | { | ||
| 166 | int congestion_kb; | ||
| 167 | |||
| 168 | /* | ||
| 169 | * Copied from NFS | ||
| 170 | * | ||
| 171 | * congestion size, scale with available memory. | ||
| 172 | * | ||
| 173 | * 64MB: 8192k | ||
| 174 | * 128MB: 11585k | ||
| 175 | * 256MB: 16384k | ||
| 176 | * 512MB: 23170k | ||
| 177 | * 1GB: 32768k | ||
| 178 | * 2GB: 46340k | ||
| 179 | * 4GB: 65536k | ||
| 180 | * 8GB: 92681k | ||
| 181 | * 16GB: 131072k | ||
| 182 | * | ||
| 183 | * This allows larger machines to have larger/more transfers. | ||
| 184 | * Limit the default to 256M | ||
| 185 | */ | ||
| 186 | congestion_kb = (16*int_sqrt(totalram_pages)) << (PAGE_SHIFT-10); | ||
| 187 | if (congestion_kb > 256*1024) | ||
| 188 | congestion_kb = 256*1024; | ||
| 189 | |||
| 190 | return congestion_kb; | ||
| 191 | } | ||
| 192 | |||
| 193 | static int __init init_caches(void) | 218 | static int __init init_caches(void) |
| 194 | { | 219 | { |
| 195 | ceph_inode_cachep = kmem_cache_create("ceph_inode_info", | 220 | ceph_inode_cachep = kmem_cache_create("ceph_inode_info", |
| @@ -308,7 +333,9 @@ enum { | |||
| 308 | Opt_osd_idle_ttl, | 333 | Opt_osd_idle_ttl, |
| 309 | Opt_caps_wanted_delay_min, | 334 | Opt_caps_wanted_delay_min, |
| 310 | Opt_caps_wanted_delay_max, | 335 | Opt_caps_wanted_delay_max, |
| 336 | Opt_cap_release_safety, | ||
| 311 | Opt_readdir_max_entries, | 337 | Opt_readdir_max_entries, |
| 338 | Opt_readdir_max_bytes, | ||
| 312 | Opt_congestion_kb, | 339 | Opt_congestion_kb, |
| 313 | Opt_last_int, | 340 | Opt_last_int, |
| 314 | /* int args above */ | 341 | /* int args above */ |
| @@ -339,7 +366,9 @@ static match_table_t arg_tokens = { | |||
| 339 | {Opt_osd_idle_ttl, "osd_idle_ttl=%d"}, | 366 | {Opt_osd_idle_ttl, "osd_idle_ttl=%d"}, |
| 340 | {Opt_caps_wanted_delay_min, "caps_wanted_delay_min=%d"}, | 367 | {Opt_caps_wanted_delay_min, "caps_wanted_delay_min=%d"}, |
| 341 | {Opt_caps_wanted_delay_max, "caps_wanted_delay_max=%d"}, | 368 | {Opt_caps_wanted_delay_max, "caps_wanted_delay_max=%d"}, |
| 369 | {Opt_cap_release_safety, "cap_release_safety=%d"}, | ||
| 342 | {Opt_readdir_max_entries, "readdir_max_entries=%d"}, | 370 | {Opt_readdir_max_entries, "readdir_max_entries=%d"}, |
| 371 | {Opt_readdir_max_bytes, "readdir_max_bytes=%d"}, | ||
| 343 | {Opt_congestion_kb, "write_congestion_kb=%d"}, | 372 | {Opt_congestion_kb, "write_congestion_kb=%d"}, |
| 344 | /* int args above */ | 373 | /* int args above */ |
| 345 | {Opt_snapdirname, "snapdirname=%s"}, | 374 | {Opt_snapdirname, "snapdirname=%s"}, |
| @@ -388,8 +417,9 @@ static struct ceph_mount_args *parse_mount_args(int flags, char *options, | |||
| 388 | args->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT; | 417 | args->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT; |
| 389 | args->rsize = CEPH_MOUNT_RSIZE_DEFAULT; | 418 | args->rsize = CEPH_MOUNT_RSIZE_DEFAULT; |
| 390 | args->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL); | 419 | args->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL); |
| 391 | args->cap_release_safety = CEPH_CAPS_PER_RELEASE * 4; | 420 | args->cap_release_safety = CEPH_CAP_RELEASE_SAFETY_DEFAULT; |
| 392 | args->max_readdir = 1024; | 421 | args->max_readdir = CEPH_MAX_READDIR_DEFAULT; |
| 422 | args->max_readdir_bytes = CEPH_MAX_READDIR_BYTES_DEFAULT; | ||
| 393 | args->congestion_kb = default_congestion_kb(); | 423 | args->congestion_kb = default_congestion_kb(); |
| 394 | 424 | ||
| 395 | /* ip1[:port1][,ip2[:port2]...]:/subdir/in/fs */ | 425 | /* ip1[:port1][,ip2[:port2]...]:/subdir/in/fs */ |
| @@ -497,6 +527,9 @@ static struct ceph_mount_args *parse_mount_args(int flags, char *options, | |||
| 497 | case Opt_readdir_max_entries: | 527 | case Opt_readdir_max_entries: |
| 498 | args->max_readdir = intval; | 528 | args->max_readdir = intval; |
| 499 | break; | 529 | break; |
| 530 | case Opt_readdir_max_bytes: | ||
| 531 | args->max_readdir_bytes = intval; | ||
| 532 | break; | ||
| 500 | case Opt_congestion_kb: | 533 | case Opt_congestion_kb: |
| 501 | args->congestion_kb = intval; | 534 | args->congestion_kb = intval; |
| 502 | break; | 535 | break; |
| @@ -682,9 +715,10 @@ int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid) | |||
| 682 | /* | 715 | /* |
| 683 | * true if we have the mon map (and have thus joined the cluster) | 716 | * true if we have the mon map (and have thus joined the cluster) |
| 684 | */ | 717 | */ |
| 685 | static int have_mon_map(struct ceph_client *client) | 718 | static int have_mon_and_osd_map(struct ceph_client *client) |
| 686 | { | 719 | { |
| 687 | return client->monc.monmap && client->monc.monmap->epoch; | 720 | return client->monc.monmap && client->monc.monmap->epoch && |
| 721 | client->osdc.osdmap && client->osdc.osdmap->epoch; | ||
| 688 | } | 722 | } |
| 689 | 723 | ||
| 690 | /* | 724 | /* |
| @@ -762,7 +796,7 @@ static int ceph_mount(struct ceph_client *client, struct vfsmount *mnt, | |||
| 762 | if (err < 0) | 796 | if (err < 0) |
| 763 | goto out; | 797 | goto out; |
| 764 | 798 | ||
| 765 | while (!have_mon_map(client)) { | 799 | while (!have_mon_and_osd_map(client)) { |
| 766 | err = -EIO; | 800 | err = -EIO; |
| 767 | if (timeout && time_after_eq(jiffies, started + timeout)) | 801 | if (timeout && time_after_eq(jiffies, started + timeout)) |
| 768 | goto out; | 802 | goto out; |
| @@ -770,8 +804,8 @@ static int ceph_mount(struct ceph_client *client, struct vfsmount *mnt, | |||
| 770 | /* wait */ | 804 | /* wait */ |
| 771 | dout("mount waiting for mon_map\n"); | 805 | dout("mount waiting for mon_map\n"); |
| 772 | err = wait_event_interruptible_timeout(client->auth_wq, | 806 | err = wait_event_interruptible_timeout(client->auth_wq, |
| 773 | have_mon_map(client) || (client->auth_err < 0), | 807 | have_mon_and_osd_map(client) || (client->auth_err < 0), |
| 774 | timeout); | 808 | timeout); |
| 775 | if (err == -EINTR || err == -ERESTARTSYS) | 809 | if (err == -EINTR || err == -ERESTARTSYS) |
| 776 | goto out; | 810 | goto out; |
| 777 | if (client->auth_err < 0) { | 811 | if (client->auth_err < 0) { |
| @@ -884,6 +918,8 @@ static int ceph_compare_super(struct super_block *sb, void *data) | |||
| 884 | /* | 918 | /* |
| 885 | * construct our own bdi so we can control readahead, etc. | 919 | * construct our own bdi so we can control readahead, etc. |
| 886 | */ | 920 | */ |
| 921 | static atomic_long_t bdi_seq = ATOMIC_INIT(0); | ||
| 922 | |||
| 887 | static int ceph_register_bdi(struct super_block *sb, struct ceph_client *client) | 923 | static int ceph_register_bdi(struct super_block *sb, struct ceph_client *client) |
| 888 | { | 924 | { |
| 889 | int err; | 925 | int err; |
| @@ -893,7 +929,8 @@ static int ceph_register_bdi(struct super_block *sb, struct ceph_client *client) | |||
| 893 | client->backing_dev_info.ra_pages = | 929 | client->backing_dev_info.ra_pages = |
| 894 | (client->mount_args->rsize + PAGE_CACHE_SIZE - 1) | 930 | (client->mount_args->rsize + PAGE_CACHE_SIZE - 1) |
| 895 | >> PAGE_SHIFT; | 931 | >> PAGE_SHIFT; |
| 896 | err = bdi_register_dev(&client->backing_dev_info, sb->s_dev); | 932 | err = bdi_register(&client->backing_dev_info, NULL, "ceph-%d", |
| 933 | atomic_long_inc_return(&bdi_seq)); | ||
| 897 | if (!err) | 934 | if (!err) |
| 898 | sb->s_bdi = &client->backing_dev_info; | 935 | sb->s_bdi = &client->backing_dev_info; |
| 899 | return err; | 936 | return err; |
| @@ -932,9 +969,9 @@ static int ceph_get_sb(struct file_system_type *fs_type, | |||
| 932 | goto out; | 969 | goto out; |
| 933 | } | 970 | } |
| 934 | 971 | ||
| 935 | if (ceph_client(sb) != client) { | 972 | if (ceph_sb_to_client(sb) != client) { |
| 936 | ceph_destroy_client(client); | 973 | ceph_destroy_client(client); |
| 937 | client = ceph_client(sb); | 974 | client = ceph_sb_to_client(sb); |
| 938 | dout("get_sb got existing client %p\n", client); | 975 | dout("get_sb got existing client %p\n", client); |
| 939 | } else { | 976 | } else { |
| 940 | dout("get_sb using new client %p\n", client); | 977 | dout("get_sb using new client %p\n", client); |
diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 13513b80d87f..3725c9ee9d08 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h | |||
| @@ -52,24 +52,25 @@ | |||
| 52 | 52 | ||
| 53 | struct ceph_mount_args { | 53 | struct ceph_mount_args { |
| 54 | int sb_flags; | 54 | int sb_flags; |
| 55 | int flags; | ||
| 56 | struct ceph_fsid fsid; | ||
| 57 | struct ceph_entity_addr my_addr; | ||
| 55 | int num_mon; | 58 | int num_mon; |
| 56 | struct ceph_entity_addr *mon_addr; | 59 | struct ceph_entity_addr *mon_addr; |
| 57 | int flags; | ||
| 58 | int mount_timeout; | 60 | int mount_timeout; |
| 59 | int osd_idle_ttl; | 61 | int osd_idle_ttl; |
| 60 | int caps_wanted_delay_min, caps_wanted_delay_max; | ||
| 61 | struct ceph_fsid fsid; | ||
| 62 | struct ceph_entity_addr my_addr; | ||
| 63 | int wsize; | ||
| 64 | int rsize; /* max readahead */ | ||
| 65 | int max_readdir; /* max readdir size */ | ||
| 66 | int congestion_kb; /* max readdir size */ | ||
| 67 | int osd_timeout; | 62 | int osd_timeout; |
| 68 | int osd_keepalive_timeout; | 63 | int osd_keepalive_timeout; |
| 64 | int wsize; | ||
| 65 | int rsize; /* max readahead */ | ||
| 66 | int congestion_kb; /* max writeback in flight */ | ||
| 67 | int caps_wanted_delay_min, caps_wanted_delay_max; | ||
| 68 | int cap_release_safety; | ||
| 69 | int max_readdir; /* max readdir result (entires) */ | ||
| 70 | int max_readdir_bytes; /* max readdir result (bytes) */ | ||
| 69 | char *snapdir_name; /* default ".snap" */ | 71 | char *snapdir_name; /* default ".snap" */ |
| 70 | char *name; | 72 | char *name; |
| 71 | char *secret; | 73 | char *secret; |
| 72 | int cap_release_safety; | ||
| 73 | }; | 74 | }; |
| 74 | 75 | ||
| 75 | /* | 76 | /* |
| @@ -80,13 +81,14 @@ struct ceph_mount_args { | |||
| 80 | #define CEPH_OSD_KEEPALIVE_DEFAULT 5 | 81 | #define CEPH_OSD_KEEPALIVE_DEFAULT 5 |
| 81 | #define CEPH_OSD_IDLE_TTL_DEFAULT 60 | 82 | #define CEPH_OSD_IDLE_TTL_DEFAULT 60 |
| 82 | #define CEPH_MOUNT_RSIZE_DEFAULT (512*1024) /* readahead */ | 83 | #define CEPH_MOUNT_RSIZE_DEFAULT (512*1024) /* readahead */ |
| 84 | #define CEPH_MAX_READDIR_DEFAULT 1024 | ||
| 85 | #define CEPH_MAX_READDIR_BYTES_DEFAULT (512*1024) | ||
| 83 | 86 | ||
| 84 | #define CEPH_MSG_MAX_FRONT_LEN (16*1024*1024) | 87 | #define CEPH_MSG_MAX_FRONT_LEN (16*1024*1024) |
| 85 | #define CEPH_MSG_MAX_DATA_LEN (16*1024*1024) | 88 | #define CEPH_MSG_MAX_DATA_LEN (16*1024*1024) |
| 86 | 89 | ||
| 87 | #define CEPH_SNAPDIRNAME_DEFAULT ".snap" | 90 | #define CEPH_SNAPDIRNAME_DEFAULT ".snap" |
| 88 | #define CEPH_AUTH_NAME_DEFAULT "guest" | 91 | #define CEPH_AUTH_NAME_DEFAULT "guest" |
| 89 | |||
| 90 | /* | 92 | /* |
| 91 | * Delay telling the MDS we no longer want caps, in case we reopen | 93 | * Delay telling the MDS we no longer want caps, in case we reopen |
| 92 | * the file. Delay a minimum amount of time, even if we send a cap | 94 | * the file. Delay a minimum amount of time, even if we send a cap |
| @@ -96,6 +98,7 @@ struct ceph_mount_args { | |||
| 96 | #define CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT 5 /* cap release delay */ | 98 | #define CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT 5 /* cap release delay */ |
| 97 | #define CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT 60 /* cap release delay */ | 99 | #define CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT 60 /* cap release delay */ |
| 98 | 100 | ||
| 101 | #define CEPH_CAP_RELEASE_SAFETY_DEFAULT (CEPH_CAPS_PER_RELEASE * 4) | ||
| 99 | 102 | ||
| 100 | /* mount state */ | 103 | /* mount state */ |
| 101 | enum { | 104 | enum { |
| @@ -160,12 +163,6 @@ struct ceph_client { | |||
| 160 | #endif | 163 | #endif |
| 161 | }; | 164 | }; |
| 162 | 165 | ||
| 163 | static inline struct ceph_client *ceph_client(struct super_block *sb) | ||
| 164 | { | ||
| 165 | return sb->s_fs_info; | ||
| 166 | } | ||
| 167 | |||
| 168 | |||
| 169 | /* | 166 | /* |
| 170 | * File i/o capability. This tracks shared state with the metadata | 167 | * File i/o capability. This tracks shared state with the metadata |
| 171 | * server that allows us to cache or writeback attributes or to read | 168 | * server that allows us to cache or writeback attributes or to read |
| @@ -871,6 +868,7 @@ extern struct dentry *ceph_finish_lookup(struct ceph_mds_request *req, | |||
| 871 | extern void ceph_dentry_lru_add(struct dentry *dn); | 868 | extern void ceph_dentry_lru_add(struct dentry *dn); |
| 872 | extern void ceph_dentry_lru_touch(struct dentry *dn); | 869 | extern void ceph_dentry_lru_touch(struct dentry *dn); |
| 873 | extern void ceph_dentry_lru_del(struct dentry *dn); | 870 | extern void ceph_dentry_lru_del(struct dentry *dn); |
| 871 | extern void ceph_invalidate_dentry_lease(struct dentry *dentry); | ||
| 874 | 872 | ||
| 875 | /* | 873 | /* |
| 876 | * our d_ops vary depending on whether the inode is live, | 874 | * our d_ops vary depending on whether the inode is live, |
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index 2845422907fc..68aeebc69681 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c | |||
| @@ -7,7 +7,8 @@ | |||
| 7 | 7 | ||
| 8 | static bool ceph_is_valid_xattr(const char *name) | 8 | static bool ceph_is_valid_xattr(const char *name) |
| 9 | { | 9 | { |
| 10 | return !strncmp(name, XATTR_SECURITY_PREFIX, | 10 | return !strncmp(name, "ceph.", 5) || |
| 11 | !strncmp(name, XATTR_SECURITY_PREFIX, | ||
| 11 | XATTR_SECURITY_PREFIX_LEN) || | 12 | XATTR_SECURITY_PREFIX_LEN) || |
| 12 | !strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) || | 13 | !strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) || |
| 13 | !strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN); | 14 | !strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN); |
| @@ -76,14 +77,14 @@ static size_t ceph_vxattrcb_rctime(struct ceph_inode_info *ci, char *val, | |||
| 76 | } | 77 | } |
| 77 | 78 | ||
| 78 | static struct ceph_vxattr_cb ceph_dir_vxattrs[] = { | 79 | static struct ceph_vxattr_cb ceph_dir_vxattrs[] = { |
| 79 | { true, "user.ceph.dir.entries", ceph_vxattrcb_entries}, | 80 | { true, "ceph.dir.entries", ceph_vxattrcb_entries}, |
| 80 | { true, "user.ceph.dir.files", ceph_vxattrcb_files}, | 81 | { true, "ceph.dir.files", ceph_vxattrcb_files}, |
| 81 | { true, "user.ceph.dir.subdirs", ceph_vxattrcb_subdirs}, | 82 | { true, "ceph.dir.subdirs", ceph_vxattrcb_subdirs}, |
| 82 | { true, "user.ceph.dir.rentries", ceph_vxattrcb_rentries}, | 83 | { true, "ceph.dir.rentries", ceph_vxattrcb_rentries}, |
| 83 | { true, "user.ceph.dir.rfiles", ceph_vxattrcb_rfiles}, | 84 | { true, "ceph.dir.rfiles", ceph_vxattrcb_rfiles}, |
| 84 | { true, "user.ceph.dir.rsubdirs", ceph_vxattrcb_rsubdirs}, | 85 | { true, "ceph.dir.rsubdirs", ceph_vxattrcb_rsubdirs}, |
| 85 | { true, "user.ceph.dir.rbytes", ceph_vxattrcb_rbytes}, | 86 | { true, "ceph.dir.rbytes", ceph_vxattrcb_rbytes}, |
| 86 | { true, "user.ceph.dir.rctime", ceph_vxattrcb_rctime}, | 87 | { true, "ceph.dir.rctime", ceph_vxattrcb_rctime}, |
| 87 | { true, NULL, NULL } | 88 | { true, NULL, NULL } |
| 88 | }; | 89 | }; |
| 89 | 90 | ||
| @@ -107,7 +108,7 @@ static size_t ceph_vxattrcb_layout(struct ceph_inode_info *ci, char *val, | |||
| 107 | } | 108 | } |
| 108 | 109 | ||
| 109 | static struct ceph_vxattr_cb ceph_file_vxattrs[] = { | 110 | static struct ceph_vxattr_cb ceph_file_vxattrs[] = { |
| 110 | { true, "user.ceph.layout", ceph_vxattrcb_layout}, | 111 | { true, "ceph.layout", ceph_vxattrcb_layout}, |
| 111 | { NULL, NULL } | 112 | { NULL, NULL } |
| 112 | }; | 113 | }; |
| 113 | 114 | ||
| @@ -186,12 +187,6 @@ static int __set_xattr(struct ceph_inode_info *ci, | |||
| 186 | ci->i_xattrs.names_size -= xattr->name_len; | 187 | ci->i_xattrs.names_size -= xattr->name_len; |
| 187 | ci->i_xattrs.vals_size -= xattr->val_len; | 188 | ci->i_xattrs.vals_size -= xattr->val_len; |
| 188 | } | 189 | } |
| 189 | if (!xattr) { | ||
| 190 | pr_err("__set_xattr ENOMEM on %p %llx.%llx xattr %s=%s\n", | ||
| 191 | &ci->vfs_inode, ceph_vinop(&ci->vfs_inode), name, | ||
| 192 | xattr->val); | ||
| 193 | return -ENOMEM; | ||
| 194 | } | ||
| 195 | ci->i_xattrs.names_size += name_len; | 190 | ci->i_xattrs.names_size += name_len; |
| 196 | ci->i_xattrs.vals_size += val_len; | 191 | ci->i_xattrs.vals_size += val_len; |
| 197 | if (val) | 192 | if (val) |
| @@ -574,7 +569,7 @@ ssize_t ceph_listxattr(struct dentry *dentry, char *names, size_t size) | |||
| 574 | ci->i_xattrs.version, ci->i_xattrs.index_version); | 569 | ci->i_xattrs.version, ci->i_xattrs.index_version); |
| 575 | 570 | ||
| 576 | if (__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1) && | 571 | if (__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1) && |
| 577 | (ci->i_xattrs.index_version > ci->i_xattrs.version)) { | 572 | (ci->i_xattrs.index_version >= ci->i_xattrs.version)) { |
| 578 | goto list_xattr; | 573 | goto list_xattr; |
| 579 | } else { | 574 | } else { |
| 580 | spin_unlock(&inode->i_lock); | 575 | spin_unlock(&inode->i_lock); |
| @@ -622,7 +617,7 @@ out: | |||
| 622 | static int ceph_sync_setxattr(struct dentry *dentry, const char *name, | 617 | static int ceph_sync_setxattr(struct dentry *dentry, const char *name, |
| 623 | const char *value, size_t size, int flags) | 618 | const char *value, size_t size, int flags) |
| 624 | { | 619 | { |
| 625 | struct ceph_client *client = ceph_client(dentry->d_sb); | 620 | struct ceph_client *client = ceph_sb_to_client(dentry->d_sb); |
| 626 | struct inode *inode = dentry->d_inode; | 621 | struct inode *inode = dentry->d_inode; |
| 627 | struct ceph_inode_info *ci = ceph_inode(inode); | 622 | struct ceph_inode_info *ci = ceph_inode(inode); |
| 628 | struct inode *parent_inode = dentry->d_parent->d_inode; | 623 | struct inode *parent_inode = dentry->d_parent->d_inode; |
| @@ -641,7 +636,7 @@ static int ceph_sync_setxattr(struct dentry *dentry, const char *name, | |||
| 641 | return -ENOMEM; | 636 | return -ENOMEM; |
| 642 | err = -ENOMEM; | 637 | err = -ENOMEM; |
| 643 | for (i = 0; i < nr_pages; i++) { | 638 | for (i = 0; i < nr_pages; i++) { |
| 644 | pages[i] = alloc_page(GFP_NOFS); | 639 | pages[i] = __page_cache_alloc(GFP_NOFS); |
| 645 | if (!pages[i]) { | 640 | if (!pages[i]) { |
| 646 | nr_pages = i; | 641 | nr_pages = i; |
| 647 | goto out; | 642 | goto out; |
| @@ -779,7 +774,7 @@ out: | |||
| 779 | 774 | ||
| 780 | static int ceph_send_removexattr(struct dentry *dentry, const char *name) | 775 | static int ceph_send_removexattr(struct dentry *dentry, const char *name) |
| 781 | { | 776 | { |
| 782 | struct ceph_client *client = ceph_client(dentry->d_sb); | 777 | struct ceph_client *client = ceph_sb_to_client(dentry->d_sb); |
| 783 | struct ceph_mds_client *mdsc = &client->mdsc; | 778 | struct ceph_mds_client *mdsc = &client->mdsc; |
| 784 | struct inode *inode = dentry->d_inode; | 779 | struct inode *inode = dentry->d_inode; |
| 785 | struct inode *parent_inode = dentry->d_parent->d_inode; | 780 | struct inode *parent_inode = dentry->d_parent->d_inode; |
