diff options
author | Yan, Zheng <zheng.z.yan@intel.com> | 2014-03-29 01:41:15 -0400 |
---|---|---|
committer | Sage Weil <sage@inktank.com> | 2014-04-05 00:08:22 -0400 |
commit | 54008399dc0ce511a07b87f1af3d1f5c791982a4 (patch) | |
tree | 6482779b43a6860debd35d11bc9a717efb262e20 | |
parent | 18cb95af2d7c69aa136ab13f02dd55188c120e75 (diff) |
ceph: preallocate buffer for readdir reply
Preallocate buffer for readdir reply. Limit number of entries in
readdir reply according to the buffer size.
Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
-rw-r--r-- | fs/ceph/dir.c | 10 | ||||
-rw-r--r-- | fs/ceph/mds_client.c | 66 | ||||
-rw-r--r-- | fs/ceph/mds_client.h | 4 |
3 files changed, 59 insertions, 21 deletions
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index ff2864a36a1c..46cd092cb013 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c | |||
@@ -252,8 +252,6 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) | |||
252 | int err; | 252 | int err; |
253 | u32 ftype; | 253 | u32 ftype; |
254 | struct ceph_mds_reply_info_parsed *rinfo; | 254 | struct ceph_mds_reply_info_parsed *rinfo; |
255 | const int max_entries = fsc->mount_options->max_readdir; | ||
256 | const int max_bytes = fsc->mount_options->max_readdir_bytes; | ||
257 | 255 | ||
258 | dout("readdir %p file %p frag %u off %u\n", inode, file, frag, off); | 256 | dout("readdir %p file %p frag %u off %u\n", inode, file, frag, off); |
259 | if (fi->flags & CEPH_F_ATEND) | 257 | if (fi->flags & CEPH_F_ATEND) |
@@ -327,6 +325,11 @@ more: | |||
327 | req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS); | 325 | req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS); |
328 | if (IS_ERR(req)) | 326 | if (IS_ERR(req)) |
329 | return PTR_ERR(req); | 327 | return PTR_ERR(req); |
328 | err = ceph_alloc_readdir_reply_buffer(req, inode); | ||
329 | if (err) { | ||
330 | ceph_mdsc_put_request(req); | ||
331 | return err; | ||
332 | } | ||
330 | req->r_inode = inode; | 333 | req->r_inode = inode; |
331 | ihold(inode); | 334 | ihold(inode); |
332 | req->r_dentry = dget(file->f_dentry); | 335 | req->r_dentry = dget(file->f_dentry); |
@@ -337,9 +340,6 @@ more: | |||
337 | req->r_path2 = kstrdup(fi->last_name, GFP_NOFS); | 340 | req->r_path2 = kstrdup(fi->last_name, GFP_NOFS); |
338 | req->r_readdir_offset = fi->next_offset; | 341 | req->r_readdir_offset = fi->next_offset; |
339 | req->r_args.readdir.frag = cpu_to_le32(frag); | 342 | req->r_args.readdir.frag = cpu_to_le32(frag); |
340 | req->r_args.readdir.max_entries = cpu_to_le32(max_entries); | ||
341 | req->r_args.readdir.max_bytes = cpu_to_le32(max_bytes); | ||
342 | req->r_num_caps = max_entries + 1; | ||
343 | err = ceph_mdsc_do_request(mdsc, NULL, req); | 343 | err = ceph_mdsc_do_request(mdsc, NULL, req); |
344 | if (err < 0) { | 344 | if (err < 0) { |
345 | ceph_mdsc_put_request(req); | 345 | ceph_mdsc_put_request(req); |
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 77640ada487a..19fbfc496137 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c | |||
@@ -3,6 +3,7 @@ | |||
3 | #include <linux/fs.h> | 3 | #include <linux/fs.h> |
4 | #include <linux/wait.h> | 4 | #include <linux/wait.h> |
5 | #include <linux/slab.h> | 5 | #include <linux/slab.h> |
6 | #include <linux/gfp.h> | ||
6 | #include <linux/sched.h> | 7 | #include <linux/sched.h> |
7 | #include <linux/debugfs.h> | 8 | #include <linux/debugfs.h> |
8 | #include <linux/seq_file.h> | 9 | #include <linux/seq_file.h> |
@@ -165,21 +166,18 @@ static int parse_reply_info_dir(void **p, void *end, | |||
165 | if (num == 0) | 166 | if (num == 0) |
166 | goto done; | 167 | goto done; |
167 | 168 | ||
168 | /* alloc large array */ | 169 | BUG_ON(!info->dir_in); |
169 | info->dir_nr = num; | ||
170 | info->dir_in = kcalloc(num, sizeof(*info->dir_in) + | ||
171 | sizeof(*info->dir_dname) + | ||
172 | sizeof(*info->dir_dname_len) + | ||
173 | sizeof(*info->dir_dlease), | ||
174 | GFP_NOFS); | ||
175 | if (info->dir_in == NULL) { | ||
176 | err = -ENOMEM; | ||
177 | goto out_bad; | ||
178 | } | ||
179 | info->dir_dname = (void *)(info->dir_in + num); | 170 | info->dir_dname = (void *)(info->dir_in + num); |
180 | info->dir_dname_len = (void *)(info->dir_dname + num); | 171 | info->dir_dname_len = (void *)(info->dir_dname + num); |
181 | info->dir_dlease = (void *)(info->dir_dname_len + num); | 172 | info->dir_dlease = (void *)(info->dir_dname_len + num); |
173 | if ((unsigned long)(info->dir_dlease + num) > | ||
174 | (unsigned long)info->dir_in + info->dir_buf_size) { | ||
175 | pr_err("dir contents are larger than expected\n"); | ||
176 | WARN_ON(1); | ||
177 | goto bad; | ||
178 | } | ||
182 | 179 | ||
180 | info->dir_nr = num; | ||
183 | while (num) { | 181 | while (num) { |
184 | /* dentry */ | 182 | /* dentry */ |
185 | ceph_decode_need(p, end, sizeof(u32)*2, bad); | 183 | ceph_decode_need(p, end, sizeof(u32)*2, bad); |
@@ -327,7 +325,9 @@ out_bad: | |||
327 | 325 | ||
328 | static void destroy_reply_info(struct ceph_mds_reply_info_parsed *info) | 326 | static void destroy_reply_info(struct ceph_mds_reply_info_parsed *info) |
329 | { | 327 | { |
330 | kfree(info->dir_in); | 328 | if (!info->dir_in) |
329 | return; | ||
330 | free_pages((unsigned long)info->dir_in, get_order(info->dir_buf_size)); | ||
331 | } | 331 | } |
332 | 332 | ||
333 | 333 | ||
@@ -512,12 +512,11 @@ void ceph_mdsc_release_request(struct kref *kref) | |||
512 | struct ceph_mds_request *req = container_of(kref, | 512 | struct ceph_mds_request *req = container_of(kref, |
513 | struct ceph_mds_request, | 513 | struct ceph_mds_request, |
514 | r_kref); | 514 | r_kref); |
515 | destroy_reply_info(&req->r_reply_info); | ||
515 | if (req->r_request) | 516 | if (req->r_request) |
516 | ceph_msg_put(req->r_request); | 517 | ceph_msg_put(req->r_request); |
517 | if (req->r_reply) { | 518 | if (req->r_reply) |
518 | ceph_msg_put(req->r_reply); | 519 | ceph_msg_put(req->r_reply); |
519 | destroy_reply_info(&req->r_reply_info); | ||
520 | } | ||
521 | if (req->r_inode) { | 520 | if (req->r_inode) { |
522 | ceph_put_cap_refs(ceph_inode(req->r_inode), CEPH_CAP_PIN); | 521 | ceph_put_cap_refs(ceph_inode(req->r_inode), CEPH_CAP_PIN); |
523 | iput(req->r_inode); | 522 | iput(req->r_inode); |
@@ -1496,6 +1495,43 @@ static void discard_cap_releases(struct ceph_mds_client *mdsc, | |||
1496 | * requests | 1495 | * requests |
1497 | */ | 1496 | */ |
1498 | 1497 | ||
1498 | int ceph_alloc_readdir_reply_buffer(struct ceph_mds_request *req, | ||
1499 | struct inode *dir) | ||
1500 | { | ||
1501 | struct ceph_inode_info *ci = ceph_inode(dir); | ||
1502 | struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info; | ||
1503 | struct ceph_mount_options *opt = req->r_mdsc->fsc->mount_options; | ||
1504 | size_t size = sizeof(*rinfo->dir_in) + sizeof(*rinfo->dir_dname_len) + | ||
1505 | sizeof(*rinfo->dir_dname) + sizeof(*rinfo->dir_dlease); | ||
1506 | int order, num_entries; | ||
1507 | |||
1508 | spin_lock(&ci->i_ceph_lock); | ||
1509 | num_entries = ci->i_files + ci->i_subdirs; | ||
1510 | spin_unlock(&ci->i_ceph_lock); | ||
1511 | num_entries = max(num_entries, 1); | ||
1512 | num_entries = min(num_entries, opt->max_readdir); | ||
1513 | |||
1514 | order = get_order(size * num_entries); | ||
1515 | while (order >= 0) { | ||
1516 | rinfo->dir_in = (void*)__get_free_pages(GFP_NOFS | __GFP_NOWARN, | ||
1517 | order); | ||
1518 | if (rinfo->dir_in) | ||
1519 | break; | ||
1520 | order--; | ||
1521 | } | ||
1522 | if (!rinfo->dir_in) | ||
1523 | return -ENOMEM; | ||
1524 | |||
1525 | num_entries = (PAGE_SIZE << order) / size; | ||
1526 | num_entries = min(num_entries, opt->max_readdir); | ||
1527 | |||
1528 | rinfo->dir_buf_size = PAGE_SIZE << order; | ||
1529 | req->r_num_caps = num_entries + 1; | ||
1530 | req->r_args.readdir.max_entries = cpu_to_le32(num_entries); | ||
1531 | req->r_args.readdir.max_bytes = cpu_to_le32(opt->max_readdir_bytes); | ||
1532 | return 0; | ||
1533 | } | ||
1534 | |||
1499 | /* | 1535 | /* |
1500 | * Create an mds request. | 1536 | * Create an mds request. |
1501 | */ | 1537 | */ |
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index 68288917c737..e90cfccf93bd 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h | |||
@@ -67,6 +67,7 @@ struct ceph_mds_reply_info_parsed { | |||
67 | /* for readdir results */ | 67 | /* for readdir results */ |
68 | struct { | 68 | struct { |
69 | struct ceph_mds_reply_dirfrag *dir_dir; | 69 | struct ceph_mds_reply_dirfrag *dir_dir; |
70 | size_t dir_buf_size; | ||
70 | int dir_nr; | 71 | int dir_nr; |
71 | char **dir_dname; | 72 | char **dir_dname; |
72 | u32 *dir_dname_len; | 73 | u32 *dir_dname_len; |
@@ -346,7 +347,8 @@ extern void ceph_mdsc_lease_release(struct ceph_mds_client *mdsc, | |||
346 | struct dentry *dn); | 347 | struct dentry *dn); |
347 | 348 | ||
348 | extern void ceph_invalidate_dir_request(struct ceph_mds_request *req); | 349 | extern void ceph_invalidate_dir_request(struct ceph_mds_request *req); |
349 | 350 | extern int ceph_alloc_readdir_reply_buffer(struct ceph_mds_request *req, | |
351 | struct inode *dir); | ||
350 | extern struct ceph_mds_request * | 352 | extern struct ceph_mds_request * |
351 | ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode); | 353 | ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode); |
352 | extern void ceph_mdsc_submit_request(struct ceph_mds_client *mdsc, | 354 | extern void ceph_mdsc_submit_request(struct ceph_mds_client *mdsc, |