aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorYan, Zheng <zyan@redhat.com>2017-04-05 12:54:05 -0400
committerIlya Dryomov <idryomov@gmail.com>2017-05-04 03:19:20 -0400
commit79162547b76e4979b21ef80c9629ada94a51a59b (patch)
tree2af22d86df9675d23fd500fc089f512ab958cabb
parent2827528da003ad207930f0d1af5faf3e482d6393 (diff)
ceph: make seeky readdir more efficient
Current cephfs client uses string to indicate start position of readdir. The string is last entry of previous readdir reply. This approach does not work for seeky readdir because we can not easily convert the new postion to a string. For seeky readdir, mds needs to return dentries from the beginning. Client keeps retrying if the reply does not contain the dentry it wants. In current version of ceph, mds sorts CDentry in its cache in hash order. Client also uses dentry hash to compose dir postion. For seeky readdir, if client passes the hash part of dir postion to mds. mds can avoid replying useless dentries. Signed-off-by: "Yan, Zheng" <zyan@redhat.com> Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
-rw-r--r--fs/ceph/dir.c4
-rw-r--r--fs/ceph/inode.c17
-rw-r--r--fs/ceph/mds_client.c1
-rw-r--r--fs/ceph/mds_client.h3
-rw-r--r--include/linux/ceph/ceph_fs.h2
5 files changed, 21 insertions, 6 deletions
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 3e9ad501addf..ae61cdf7d489 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -378,7 +378,11 @@ more:
378 ceph_mdsc_put_request(req); 378 ceph_mdsc_put_request(req);
379 return -ENOMEM; 379 return -ENOMEM;
380 } 380 }
381 } else if (is_hash_order(ctx->pos)) {
382 req->r_args.readdir.offset_hash =
383 cpu_to_le32(fpos_hash(ctx->pos));
381 } 384 }
385
382 req->r_dir_release_cnt = fi->dir_release_count; 386 req->r_dir_release_cnt = fi->dir_release_count;
383 req->r_dir_ordered_cnt = fi->dir_ordered_count; 387 req->r_dir_ordered_cnt = fi->dir_ordered_count;
384 req->r_readdir_cache_idx = fi->readdir_cache_idx; 388 req->r_readdir_cache_idx = fi->readdir_cache_idx;
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index d3119fe3ab45..dcce79b84406 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -1482,10 +1482,17 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
1482 if (test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags)) 1482 if (test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags))
1483 return readdir_prepopulate_inodes_only(req, session); 1483 return readdir_prepopulate_inodes_only(req, session);
1484 1484
1485 if (rinfo->hash_order && req->r_path2) { 1485 if (rinfo->hash_order) {
1486 last_hash = ceph_str_hash(ci->i_dir_layout.dl_dir_hash, 1486 if (req->r_path2) {
1487 req->r_path2, strlen(req->r_path2)); 1487 last_hash = ceph_str_hash(ci->i_dir_layout.dl_dir_hash,
1488 last_hash = ceph_frag_value(last_hash); 1488 req->r_path2,
1489 strlen(req->r_path2));
1490 last_hash = ceph_frag_value(last_hash);
1491 } else if (rinfo->offset_hash) {
1492 /* mds understands offset_hash */
1493 WARN_ON_ONCE(req->r_readdir_offset != 2);
1494 last_hash = le32_to_cpu(rhead->args.readdir.offset_hash);
1495 }
1489 } 1496 }
1490 1497
1491 if (rinfo->dir_dir && 1498 if (rinfo->dir_dir &&
@@ -1510,7 +1517,7 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
1510 } 1517 }
1511 1518
1512 if (ceph_frag_is_leftmost(frag) && req->r_readdir_offset == 2 && 1519 if (ceph_frag_is_leftmost(frag) && req->r_readdir_offset == 2 &&
1513 !(rinfo->hash_order && req->r_path2)) { 1520 !(rinfo->hash_order && last_hash)) {
1514 /* note dir version at start of readdir so we can tell 1521 /* note dir version at start of readdir so we can tell
1515 * if any dentries get dropped */ 1522 * if any dentries get dropped */
1516 req->r_dir_release_cnt = atomic64_read(&ci->i_release_count); 1523 req->r_dir_release_cnt = atomic64_read(&ci->i_release_count);
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index a22688873ec3..8cc4d4e8b077 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -189,6 +189,7 @@ static int parse_reply_info_dir(void **p, void *end,
189 info->dir_end = !!(flags & CEPH_READDIR_FRAG_END); 189 info->dir_end = !!(flags & CEPH_READDIR_FRAG_END);
190 info->dir_complete = !!(flags & CEPH_READDIR_FRAG_COMPLETE); 190 info->dir_complete = !!(flags & CEPH_READDIR_FRAG_COMPLETE);
191 info->hash_order = !!(flags & CEPH_READDIR_HASH_ORDER); 191 info->hash_order = !!(flags & CEPH_READDIR_HASH_ORDER);
192 info->offset_hash = !!(flags & CEPH_READDIR_OFFSET_HASH);
192 } 193 }
193 if (num == 0) 194 if (num == 0)
194 goto done; 195 goto done;
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index bbebcd55d79e..3e67dd2169fa 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -83,9 +83,10 @@ struct ceph_mds_reply_info_parsed {
83 struct ceph_mds_reply_dirfrag *dir_dir; 83 struct ceph_mds_reply_dirfrag *dir_dir;
84 size_t dir_buf_size; 84 size_t dir_buf_size;
85 int dir_nr; 85 int dir_nr;
86 bool dir_complete;
87 bool dir_end; 86 bool dir_end;
87 bool dir_complete;
88 bool hash_order; 88 bool hash_order;
89 bool offset_hash;
89 struct ceph_mds_reply_dir_entry *dir_entries; 90 struct ceph_mds_reply_dir_entry *dir_entries;
90 }; 91 };
91 92
diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h
index f4b2ee18f38c..1787e4a8e251 100644
--- a/include/linux/ceph/ceph_fs.h
+++ b/include/linux/ceph/ceph_fs.h
@@ -365,6 +365,7 @@ extern const char *ceph_mds_op_name(int op);
365#define CEPH_READDIR_FRAG_END (1<<0) 365#define CEPH_READDIR_FRAG_END (1<<0)
366#define CEPH_READDIR_FRAG_COMPLETE (1<<8) 366#define CEPH_READDIR_FRAG_COMPLETE (1<<8)
367#define CEPH_READDIR_HASH_ORDER (1<<9) 367#define CEPH_READDIR_HASH_ORDER (1<<9)
368#define CEPH_READDIR_OFFSET_HASH (1<<10)
368 369
369union ceph_mds_request_args { 370union ceph_mds_request_args {
370 struct { 371 struct {
@@ -384,6 +385,7 @@ union ceph_mds_request_args {
384 __le32 max_entries; /* how many dentries to grab */ 385 __le32 max_entries; /* how many dentries to grab */
385 __le32 max_bytes; 386 __le32 max_bytes;
386 __le16 flags; 387 __le16 flags;
388 __le32 offset_hash;
387 } __attribute__ ((packed)) readdir; 389 } __attribute__ ((packed)) readdir;
388 struct { 390 struct {
389 __le32 mode; 391 __le32 mode;