aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ceph
diff options
context:
space:
mode:
authorSage Weil <sage@newdream.net>2010-12-14 20:37:52 -0500
committerSage Weil <sage@newdream.net>2011-01-12 18:15:13 -0500
commit14303d20f3ae3e6ab626c77a4aac202b3bafd377 (patch)
treeee93c0de758571721f594c35f556ca79468e684f /fs/ceph
parent6c0f3af72cb1622a66962a1180c36ef8c41be8e2 (diff)
ceph: implement DIRLAYOUTHASH feature to get dir layout from MDS
This implements the DIRLAYOUTHASH protocol feature, which passes the dir layout over the wire from the MDS. This gives the client knowledge of the correct hash function to use for mapping dentries among dir fragments. Note that if this feature is _not_ present on the client but is on the MDS, the client may misdirect requests. This will result in a forward and degrade performance. It may also result in inaccurate NFS filehandle generation, which will prevent fh resolution when the inode is not present in the client cache and the parent directories have been fragmented. Signed-off-by: Sage Weil <sage@newdream.net>
Diffstat (limited to 'fs/ceph')
-rw-r--r--fs/ceph/inode.c2
-rw-r--r--fs/ceph/mds_client.c42
-rw-r--r--fs/ceph/mds_client.h1
-rw-r--r--fs/ceph/super.c3
4 files changed, 32 insertions, 16 deletions
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 045283ce4413..e791fa34b23d 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -682,6 +682,8 @@ static int fill_inode(struct inode *inode,
682 inode->i_op = &ceph_dir_iops; 682 inode->i_op = &ceph_dir_iops;
683 inode->i_fop = &ceph_dir_fops; 683 inode->i_fop = &ceph_dir_fops;
684 684
685 ci->i_dir_layout = iinfo->dir_layout;
686
685 ci->i_files = le64_to_cpu(info->files); 687 ci->i_files = le64_to_cpu(info->files);
686 ci->i_subdirs = le64_to_cpu(info->subdirs); 688 ci->i_subdirs = le64_to_cpu(info->subdirs);
687 ci->i_rbytes = le64_to_cpu(info->rbytes); 689 ci->i_rbytes = le64_to_cpu(info->rbytes);
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 38800eaa81d0..9be29b06a2d9 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -60,7 +60,8 @@ static const struct ceph_connection_operations mds_con_ops;
60 * parse individual inode info 60 * parse individual inode info
61 */ 61 */
62static int parse_reply_info_in(void **p, void *end, 62static int parse_reply_info_in(void **p, void *end,
63 struct ceph_mds_reply_info_in *info) 63 struct ceph_mds_reply_info_in *info,
64 int features)
64{ 65{
65 int err = -EIO; 66 int err = -EIO;
66 67
@@ -74,6 +75,12 @@ static int parse_reply_info_in(void **p, void *end,
74 info->symlink = *p; 75 info->symlink = *p;
75 *p += info->symlink_len; 76 *p += info->symlink_len;
76 77
78 if (features & CEPH_FEATURE_DIRLAYOUTHASH)
79 ceph_decode_copy_safe(p, end, &info->dir_layout,
80 sizeof(info->dir_layout), bad);
81 else
82 memset(&info->dir_layout, 0, sizeof(info->dir_layout));
83
77 ceph_decode_32_safe(p, end, info->xattr_len, bad); 84 ceph_decode_32_safe(p, end, info->xattr_len, bad);
78 ceph_decode_need(p, end, info->xattr_len, bad); 85 ceph_decode_need(p, end, info->xattr_len, bad);
79 info->xattr_data = *p; 86 info->xattr_data = *p;
@@ -88,12 +95,13 @@ bad:
88 * target inode. 95 * target inode.
89 */ 96 */
90static int parse_reply_info_trace(void **p, void *end, 97static int parse_reply_info_trace(void **p, void *end,
91 struct ceph_mds_reply_info_parsed *info) 98 struct ceph_mds_reply_info_parsed *info,
99 int features)
92{ 100{
93 int err; 101 int err;
94 102
95 if (info->head->is_dentry) { 103 if (info->head->is_dentry) {
96 err = parse_reply_info_in(p, end, &info->diri); 104 err = parse_reply_info_in(p, end, &info->diri, features);
97 if (err < 0) 105 if (err < 0)
98 goto out_bad; 106 goto out_bad;
99 107
@@ -114,7 +122,7 @@ static int parse_reply_info_trace(void **p, void *end,
114 } 122 }
115 123
116 if (info->head->is_target) { 124 if (info->head->is_target) {
117 err = parse_reply_info_in(p, end, &info->targeti); 125 err = parse_reply_info_in(p, end, &info->targeti, features);
118 if (err < 0) 126 if (err < 0)
119 goto out_bad; 127 goto out_bad;
120 } 128 }
@@ -134,7 +142,8 @@ out_bad:
134 * parse readdir results 142 * parse readdir results
135 */ 143 */
136static int parse_reply_info_dir(void **p, void *end, 144static int parse_reply_info_dir(void **p, void *end,
137 struct ceph_mds_reply_info_parsed *info) 145 struct ceph_mds_reply_info_parsed *info,
146 int features)
138{ 147{
139 u32 num, i = 0; 148 u32 num, i = 0;
140 int err; 149 int err;
@@ -182,7 +191,7 @@ static int parse_reply_info_dir(void **p, void *end,
182 *p += sizeof(struct ceph_mds_reply_lease); 191 *p += sizeof(struct ceph_mds_reply_lease);
183 192
184 /* inode */ 193 /* inode */
185 err = parse_reply_info_in(p, end, &info->dir_in[i]); 194 err = parse_reply_info_in(p, end, &info->dir_in[i], features);
186 if (err < 0) 195 if (err < 0)
187 goto out_bad; 196 goto out_bad;
188 i++; 197 i++;
@@ -205,7 +214,8 @@ out_bad:
205 * parse fcntl F_GETLK results 214 * parse fcntl F_GETLK results
206 */ 215 */
207static int parse_reply_info_filelock(void **p, void *end, 216static int parse_reply_info_filelock(void **p, void *end,
208 struct ceph_mds_reply_info_parsed *info) 217 struct ceph_mds_reply_info_parsed *info,
218 int features)
209{ 219{
210 if (*p + sizeof(*info->filelock_reply) > end) 220 if (*p + sizeof(*info->filelock_reply) > end)
211 goto bad; 221 goto bad;
@@ -225,19 +235,21 @@ bad:
225 * parse extra results 235 * parse extra results
226 */ 236 */
227static int parse_reply_info_extra(void **p, void *end, 237static int parse_reply_info_extra(void **p, void *end,
228 struct ceph_mds_reply_info_parsed *info) 238 struct ceph_mds_reply_info_parsed *info,
239 int features)
229{ 240{
230 if (info->head->op == CEPH_MDS_OP_GETFILELOCK) 241 if (info->head->op == CEPH_MDS_OP_GETFILELOCK)
231 return parse_reply_info_filelock(p, end, info); 242 return parse_reply_info_filelock(p, end, info, features);
232 else 243 else
233 return parse_reply_info_dir(p, end, info); 244 return parse_reply_info_dir(p, end, info, features);
234} 245}
235 246
236/* 247/*
237 * parse entire mds reply 248 * parse entire mds reply
238 */ 249 */
239static int parse_reply_info(struct ceph_msg *msg, 250static int parse_reply_info(struct ceph_msg *msg,
240 struct ceph_mds_reply_info_parsed *info) 251 struct ceph_mds_reply_info_parsed *info,
252 int features)
241{ 253{
242 void *p, *end; 254 void *p, *end;
243 u32 len; 255 u32 len;
@@ -250,7 +262,7 @@ static int parse_reply_info(struct ceph_msg *msg,
250 /* trace */ 262 /* trace */
251 ceph_decode_32_safe(&p, end, len, bad); 263 ceph_decode_32_safe(&p, end, len, bad);
252 if (len > 0) { 264 if (len > 0) {
253 err = parse_reply_info_trace(&p, p+len, info); 265 err = parse_reply_info_trace(&p, p+len, info, features);
254 if (err < 0) 266 if (err < 0)
255 goto out_bad; 267 goto out_bad;
256 } 268 }
@@ -258,7 +270,7 @@ static int parse_reply_info(struct ceph_msg *msg,
258 /* extra */ 270 /* extra */
259 ceph_decode_32_safe(&p, end, len, bad); 271 ceph_decode_32_safe(&p, end, len, bad);
260 if (len > 0) { 272 if (len > 0) {
261 err = parse_reply_info_extra(&p, p+len, info); 273 err = parse_reply_info_extra(&p, p+len, info, features);
262 if (err < 0) 274 if (err < 0)
263 goto out_bad; 275 goto out_bad;
264 } 276 }
@@ -654,7 +666,7 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
654 } else { 666 } else {
655 /* dir + name */ 667 /* dir + name */
656 inode = dir; 668 inode = dir;
657 hash = req->r_dentry->d_name.hash; 669 hash = ceph_dentry_hash(req->r_dentry);
658 is_hash = true; 670 is_hash = true;
659 } 671 }
660 } 672 }
@@ -2101,7 +2113,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
2101 2113
2102 dout("handle_reply tid %lld result %d\n", tid, result); 2114 dout("handle_reply tid %lld result %d\n", tid, result);
2103 rinfo = &req->r_reply_info; 2115 rinfo = &req->r_reply_info;
2104 err = parse_reply_info(msg, rinfo); 2116 err = parse_reply_info(msg, rinfo, session->s_con.peer_features);
2105 mutex_unlock(&mdsc->mutex); 2117 mutex_unlock(&mdsc->mutex);
2106 2118
2107 mutex_lock(&session->s_mutex); 2119 mutex_lock(&session->s_mutex);
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index aabe563b54db..f8f27f6eaa90 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -35,6 +35,7 @@ struct ceph_cap;
35 */ 35 */
36struct ceph_mds_reply_info_in { 36struct ceph_mds_reply_info_in {
37 struct ceph_mds_reply_inode *in; 37 struct ceph_mds_reply_inode *in;
38 struct ceph_dir_layout dir_layout;
38 u32 symlink_len; 39 u32 symlink_len;
39 char *symlink; 40 char *symlink;
40 u32 xattr_len; 41 u32 xattr_len;
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 08b460ae0539..1417f3f3e246 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -428,7 +428,8 @@ struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt,
428 goto fail; 428 goto fail;
429 } 429 }
430 fsc->client->extra_mon_dispatch = extra_mon_dispatch; 430 fsc->client->extra_mon_dispatch = extra_mon_dispatch;
431 fsc->client->supported_features |= CEPH_FEATURE_FLOCK; 431 fsc->client->supported_features |= CEPH_FEATURE_FLOCK |
432 CEPH_FEATURE_DIRLAYOUTHASH;
432 fsc->client->monc.want_mdsmap = 1; 433 fsc->client->monc.want_mdsmap = 1;
433 434
434 fsc->mount_options = fsopt; 435 fsc->mount_options = fsopt;