diff options
author | Sage Weil <sage@newdream.net> | 2010-12-14 20:37:52 -0500 |
---|---|---|
committer | Sage Weil <sage@newdream.net> | 2011-01-12 18:15:13 -0500 |
commit | 14303d20f3ae3e6ab626c77a4aac202b3bafd377 (patch) | |
tree | ee93c0de758571721f594c35f556ca79468e684f /fs | |
parent | 6c0f3af72cb1622a66962a1180c36ef8c41be8e2 (diff) |
ceph: implement DIRLAYOUTHASH feature to get dir layout from MDS
This implements the DIRLAYOUTHASH protocol feature, which passes the dir
layout over the wire from the MDS. This gives the client knowledge
of the correct hash function to use for mapping dentries among dir
fragments.
Note that if this feature is _not_ present on the client but is on the
MDS, the client may misdirect requests. This will result in a forward
and degrade performance. It may also result in inaccurate NFS filehandle
generation, which will prevent fh resolution when the inode is not present
in the client cache and the parent directories have been fragmented.
Signed-off-by: Sage Weil <sage@newdream.net>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/ceph/inode.c | 2 | ||||
-rw-r--r-- | fs/ceph/mds_client.c | 42 | ||||
-rw-r--r-- | fs/ceph/mds_client.h | 1 | ||||
-rw-r--r-- | fs/ceph/super.c | 3 |
4 files changed, 32 insertions, 16 deletions
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 045283ce4413..e791fa34b23d 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c | |||
@@ -682,6 +682,8 @@ static int fill_inode(struct inode *inode, | |||
682 | inode->i_op = &ceph_dir_iops; | 682 | inode->i_op = &ceph_dir_iops; |
683 | inode->i_fop = &ceph_dir_fops; | 683 | inode->i_fop = &ceph_dir_fops; |
684 | 684 | ||
685 | ci->i_dir_layout = iinfo->dir_layout; | ||
686 | |||
685 | ci->i_files = le64_to_cpu(info->files); | 687 | ci->i_files = le64_to_cpu(info->files); |
686 | ci->i_subdirs = le64_to_cpu(info->subdirs); | 688 | ci->i_subdirs = le64_to_cpu(info->subdirs); |
687 | ci->i_rbytes = le64_to_cpu(info->rbytes); | 689 | ci->i_rbytes = le64_to_cpu(info->rbytes); |
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 38800eaa81d0..9be29b06a2d9 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c | |||
@@ -60,7 +60,8 @@ static const struct ceph_connection_operations mds_con_ops; | |||
60 | * parse individual inode info | 60 | * parse individual inode info |
61 | */ | 61 | */ |
62 | static int parse_reply_info_in(void **p, void *end, | 62 | static int parse_reply_info_in(void **p, void *end, |
63 | struct ceph_mds_reply_info_in *info) | 63 | struct ceph_mds_reply_info_in *info, |
64 | int features) | ||
64 | { | 65 | { |
65 | int err = -EIO; | 66 | int err = -EIO; |
66 | 67 | ||
@@ -74,6 +75,12 @@ static int parse_reply_info_in(void **p, void *end, | |||
74 | info->symlink = *p; | 75 | info->symlink = *p; |
75 | *p += info->symlink_len; | 76 | *p += info->symlink_len; |
76 | 77 | ||
78 | if (features & CEPH_FEATURE_DIRLAYOUTHASH) | ||
79 | ceph_decode_copy_safe(p, end, &info->dir_layout, | ||
80 | sizeof(info->dir_layout), bad); | ||
81 | else | ||
82 | memset(&info->dir_layout, 0, sizeof(info->dir_layout)); | ||
83 | |||
77 | ceph_decode_32_safe(p, end, info->xattr_len, bad); | 84 | ceph_decode_32_safe(p, end, info->xattr_len, bad); |
78 | ceph_decode_need(p, end, info->xattr_len, bad); | 85 | ceph_decode_need(p, end, info->xattr_len, bad); |
79 | info->xattr_data = *p; | 86 | info->xattr_data = *p; |
@@ -88,12 +95,13 @@ bad: | |||
88 | * target inode. | 95 | * target inode. |
89 | */ | 96 | */ |
90 | static int parse_reply_info_trace(void **p, void *end, | 97 | static int parse_reply_info_trace(void **p, void *end, |
91 | struct ceph_mds_reply_info_parsed *info) | 98 | struct ceph_mds_reply_info_parsed *info, |
99 | int features) | ||
92 | { | 100 | { |
93 | int err; | 101 | int err; |
94 | 102 | ||
95 | if (info->head->is_dentry) { | 103 | if (info->head->is_dentry) { |
96 | err = parse_reply_info_in(p, end, &info->diri); | 104 | err = parse_reply_info_in(p, end, &info->diri, features); |
97 | if (err < 0) | 105 | if (err < 0) |
98 | goto out_bad; | 106 | goto out_bad; |
99 | 107 | ||
@@ -114,7 +122,7 @@ static int parse_reply_info_trace(void **p, void *end, | |||
114 | } | 122 | } |
115 | 123 | ||
116 | if (info->head->is_target) { | 124 | if (info->head->is_target) { |
117 | err = parse_reply_info_in(p, end, &info->targeti); | 125 | err = parse_reply_info_in(p, end, &info->targeti, features); |
118 | if (err < 0) | 126 | if (err < 0) |
119 | goto out_bad; | 127 | goto out_bad; |
120 | } | 128 | } |
@@ -134,7 +142,8 @@ out_bad: | |||
134 | * parse readdir results | 142 | * parse readdir results |
135 | */ | 143 | */ |
136 | static int parse_reply_info_dir(void **p, void *end, | 144 | static int parse_reply_info_dir(void **p, void *end, |
137 | struct ceph_mds_reply_info_parsed *info) | 145 | struct ceph_mds_reply_info_parsed *info, |
146 | int features) | ||
138 | { | 147 | { |
139 | u32 num, i = 0; | 148 | u32 num, i = 0; |
140 | int err; | 149 | int err; |
@@ -182,7 +191,7 @@ static int parse_reply_info_dir(void **p, void *end, | |||
182 | *p += sizeof(struct ceph_mds_reply_lease); | 191 | *p += sizeof(struct ceph_mds_reply_lease); |
183 | 192 | ||
184 | /* inode */ | 193 | /* inode */ |
185 | err = parse_reply_info_in(p, end, &info->dir_in[i]); | 194 | err = parse_reply_info_in(p, end, &info->dir_in[i], features); |
186 | if (err < 0) | 195 | if (err < 0) |
187 | goto out_bad; | 196 | goto out_bad; |
188 | i++; | 197 | i++; |
@@ -205,7 +214,8 @@ out_bad: | |||
205 | * parse fcntl F_GETLK results | 214 | * parse fcntl F_GETLK results |
206 | */ | 215 | */ |
207 | static int parse_reply_info_filelock(void **p, void *end, | 216 | static int parse_reply_info_filelock(void **p, void *end, |
208 | struct ceph_mds_reply_info_parsed *info) | 217 | struct ceph_mds_reply_info_parsed *info, |
218 | int features) | ||
209 | { | 219 | { |
210 | if (*p + sizeof(*info->filelock_reply) > end) | 220 | if (*p + sizeof(*info->filelock_reply) > end) |
211 | goto bad; | 221 | goto bad; |
@@ -225,19 +235,21 @@ bad: | |||
225 | * parse extra results | 235 | * parse extra results |
226 | */ | 236 | */ |
227 | static int parse_reply_info_extra(void **p, void *end, | 237 | static int parse_reply_info_extra(void **p, void *end, |
228 | struct ceph_mds_reply_info_parsed *info) | 238 | struct ceph_mds_reply_info_parsed *info, |
239 | int features) | ||
229 | { | 240 | { |
230 | if (info->head->op == CEPH_MDS_OP_GETFILELOCK) | 241 | if (info->head->op == CEPH_MDS_OP_GETFILELOCK) |
231 | return parse_reply_info_filelock(p, end, info); | 242 | return parse_reply_info_filelock(p, end, info, features); |
232 | else | 243 | else |
233 | return parse_reply_info_dir(p, end, info); | 244 | return parse_reply_info_dir(p, end, info, features); |
234 | } | 245 | } |
235 | 246 | ||
236 | /* | 247 | /* |
237 | * parse entire mds reply | 248 | * parse entire mds reply |
238 | */ | 249 | */ |
239 | static int parse_reply_info(struct ceph_msg *msg, | 250 | static int parse_reply_info(struct ceph_msg *msg, |
240 | struct ceph_mds_reply_info_parsed *info) | 251 | struct ceph_mds_reply_info_parsed *info, |
252 | int features) | ||
241 | { | 253 | { |
242 | void *p, *end; | 254 | void *p, *end; |
243 | u32 len; | 255 | u32 len; |
@@ -250,7 +262,7 @@ static int parse_reply_info(struct ceph_msg *msg, | |||
250 | /* trace */ | 262 | /* trace */ |
251 | ceph_decode_32_safe(&p, end, len, bad); | 263 | ceph_decode_32_safe(&p, end, len, bad); |
252 | if (len > 0) { | 264 | if (len > 0) { |
253 | err = parse_reply_info_trace(&p, p+len, info); | 265 | err = parse_reply_info_trace(&p, p+len, info, features); |
254 | if (err < 0) | 266 | if (err < 0) |
255 | goto out_bad; | 267 | goto out_bad; |
256 | } | 268 | } |
@@ -258,7 +270,7 @@ static int parse_reply_info(struct ceph_msg *msg, | |||
258 | /* extra */ | 270 | /* extra */ |
259 | ceph_decode_32_safe(&p, end, len, bad); | 271 | ceph_decode_32_safe(&p, end, len, bad); |
260 | if (len > 0) { | 272 | if (len > 0) { |
261 | err = parse_reply_info_extra(&p, p+len, info); | 273 | err = parse_reply_info_extra(&p, p+len, info, features); |
262 | if (err < 0) | 274 | if (err < 0) |
263 | goto out_bad; | 275 | goto out_bad; |
264 | } | 276 | } |
@@ -654,7 +666,7 @@ static int __choose_mds(struct ceph_mds_client *mdsc, | |||
654 | } else { | 666 | } else { |
655 | /* dir + name */ | 667 | /* dir + name */ |
656 | inode = dir; | 668 | inode = dir; |
657 | hash = req->r_dentry->d_name.hash; | 669 | hash = ceph_dentry_hash(req->r_dentry); |
658 | is_hash = true; | 670 | is_hash = true; |
659 | } | 671 | } |
660 | } | 672 | } |
@@ -2101,7 +2113,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) | |||
2101 | 2113 | ||
2102 | dout("handle_reply tid %lld result %d\n", tid, result); | 2114 | dout("handle_reply tid %lld result %d\n", tid, result); |
2103 | rinfo = &req->r_reply_info; | 2115 | rinfo = &req->r_reply_info; |
2104 | err = parse_reply_info(msg, rinfo); | 2116 | err = parse_reply_info(msg, rinfo, session->s_con.peer_features); |
2105 | mutex_unlock(&mdsc->mutex); | 2117 | mutex_unlock(&mdsc->mutex); |
2106 | 2118 | ||
2107 | mutex_lock(&session->s_mutex); | 2119 | mutex_lock(&session->s_mutex); |
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index aabe563b54db..f8f27f6eaa90 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h | |||
@@ -35,6 +35,7 @@ struct ceph_cap; | |||
35 | */ | 35 | */ |
36 | struct ceph_mds_reply_info_in { | 36 | struct ceph_mds_reply_info_in { |
37 | struct ceph_mds_reply_inode *in; | 37 | struct ceph_mds_reply_inode *in; |
38 | struct ceph_dir_layout dir_layout; | ||
38 | u32 symlink_len; | 39 | u32 symlink_len; |
39 | char *symlink; | 40 | char *symlink; |
40 | u32 xattr_len; | 41 | u32 xattr_len; |
diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 08b460ae0539..1417f3f3e246 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c | |||
@@ -428,7 +428,8 @@ struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt, | |||
428 | goto fail; | 428 | goto fail; |
429 | } | 429 | } |
430 | fsc->client->extra_mon_dispatch = extra_mon_dispatch; | 430 | fsc->client->extra_mon_dispatch = extra_mon_dispatch; |
431 | fsc->client->supported_features |= CEPH_FEATURE_FLOCK; | 431 | fsc->client->supported_features |= CEPH_FEATURE_FLOCK | |
432 | CEPH_FEATURE_DIRLAYOUTHASH; | ||
432 | fsc->client->monc.want_mdsmap = 1; | 433 | fsc->client->monc.want_mdsmap = 1; |
433 | 434 | ||
434 | fsc->mount_options = fsopt; | 435 | fsc->mount_options = fsopt; |