diff options
author | Sage Weil <sage@newdream.net> | 2010-02-15 15:08:46 -0500 |
---|---|---|
committer | Sage Weil <sage@newdream.net> | 2010-02-17 01:01:08 -0500 |
commit | 44ca18f2682eb1cfbed153849adedb79e3e19790 (patch) | |
tree | 063dd8382179e65717ec10dc1ffc135950050abd /fs/ceph | |
parent | 91e45ce38946a8efa21fefbc65d023ca3c0b434f (diff) |
ceph: use rbtree for mds requests
The rbtree is a more appropriate data structure than a radix_tree. It
avoids extra memory usage and simplifies the code.
It also fixes a bug where the debugfs 'mdsc' file wasn't including the
most recent mds request.
Signed-off-by: Sage Weil <sage@newdream.net>
Diffstat (limited to 'fs/ceph')
-rw-r--r-- | fs/ceph/debugfs.c | 13 | ||||
-rw-r--r-- | fs/ceph/mds_client.c | 149 | ||||
-rw-r--r-- | fs/ceph/mds_client.h | 4 |
3 files changed, 97 insertions, 69 deletions
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c index fba44b2a6086..cd5dd805e4be 100644 --- a/fs/ceph/debugfs.c +++ b/fs/ceph/debugfs.c | |||
@@ -142,21 +142,16 @@ static int monc_show(struct seq_file *s, void *p) | |||
142 | static int mdsc_show(struct seq_file *s, void *p) | 142 | static int mdsc_show(struct seq_file *s, void *p) |
143 | { | 143 | { |
144 | struct ceph_client *client = s->private; | 144 | struct ceph_client *client = s->private; |
145 | struct ceph_mds_request *req; | ||
146 | u64 nexttid = 0; | ||
147 | int got; | ||
148 | struct ceph_mds_client *mdsc = &client->mdsc; | 145 | struct ceph_mds_client *mdsc = &client->mdsc; |
146 | struct ceph_mds_request *req; | ||
147 | struct rb_node *rp; | ||
149 | int pathlen; | 148 | int pathlen; |
150 | u64 pathbase; | 149 | u64 pathbase; |
151 | char *path; | 150 | char *path; |
152 | 151 | ||
153 | mutex_lock(&mdsc->mutex); | 152 | mutex_lock(&mdsc->mutex); |
154 | while (nexttid < mdsc->last_tid) { | 153 | for (rp = rb_first(&mdsc->request_tree); rp; rp = rb_next(rp)) { |
155 | got = radix_tree_gang_lookup(&mdsc->request_tree, | 154 | req = rb_entry(rp, struct ceph_mds_request, r_node); |
156 | (void **)&req, nexttid, 1); | ||
157 | if (got == 0) | ||
158 | break; | ||
159 | nexttid = req->r_tid + 1; | ||
160 | 155 | ||
161 | if (req->r_request) | 156 | if (req->r_request) |
162 | seq_printf(s, "%lld\tmds%d\t", req->r_tid, req->r_mds); | 157 | seq_printf(s, "%lld\tmds%d\t", req->r_tid, req->r_mds); |
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index aa8506bad42d..81840d6b68a4 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c | |||
@@ -255,6 +255,7 @@ static const char *session_state_name(int s) | |||
255 | case CEPH_MDS_SESSION_OPEN: return "open"; | 255 | case CEPH_MDS_SESSION_OPEN: return "open"; |
256 | case CEPH_MDS_SESSION_HUNG: return "hung"; | 256 | case CEPH_MDS_SESSION_HUNG: return "hung"; |
257 | case CEPH_MDS_SESSION_CLOSING: return "closing"; | 257 | case CEPH_MDS_SESSION_CLOSING: return "closing"; |
258 | case CEPH_MDS_SESSION_RESTARTING: return "restarting"; | ||
258 | case CEPH_MDS_SESSION_RECONNECTING: return "reconnecting"; | 259 | case CEPH_MDS_SESSION_RECONNECTING: return "reconnecting"; |
259 | default: return "???"; | 260 | default: return "???"; |
260 | } | 261 | } |
@@ -448,10 +449,42 @@ static struct ceph_mds_request *__lookup_request(struct ceph_mds_client *mdsc, | |||
448 | u64 tid) | 449 | u64 tid) |
449 | { | 450 | { |
450 | struct ceph_mds_request *req; | 451 | struct ceph_mds_request *req; |
451 | req = radix_tree_lookup(&mdsc->request_tree, tid); | 452 | struct rb_node *n = mdsc->request_tree.rb_node; |
452 | if (req) | 453 | |
453 | ceph_mdsc_get_request(req); | 454 | while (n) { |
454 | return req; | 455 | req = rb_entry(n, struct ceph_mds_request, r_node); |
456 | if (tid < req->r_tid) | ||
457 | n = n->rb_left; | ||
458 | else if (tid > req->r_tid) | ||
459 | n = n->rb_right; | ||
460 | else { | ||
461 | ceph_mdsc_get_request(req); | ||
462 | return req; | ||
463 | } | ||
464 | } | ||
465 | return NULL; | ||
466 | } | ||
467 | |||
468 | static void __insert_request(struct ceph_mds_client *mdsc, | ||
469 | struct ceph_mds_request *new) | ||
470 | { | ||
471 | struct rb_node **p = &mdsc->request_tree.rb_node; | ||
472 | struct rb_node *parent = NULL; | ||
473 | struct ceph_mds_request *req = NULL; | ||
474 | |||
475 | while (*p) { | ||
476 | parent = *p; | ||
477 | req = rb_entry(parent, struct ceph_mds_request, r_node); | ||
478 | if (new->r_tid < req->r_tid) | ||
479 | p = &(*p)->rb_left; | ||
480 | else if (new->r_tid > req->r_tid) | ||
481 | p = &(*p)->rb_right; | ||
482 | else | ||
483 | BUG(); | ||
484 | } | ||
485 | |||
486 | rb_link_node(&new->r_node, parent, p); | ||
487 | rb_insert_color(&new->r_node, &mdsc->request_tree); | ||
455 | } | 488 | } |
456 | 489 | ||
457 | /* | 490 | /* |
@@ -469,7 +502,7 @@ static void __register_request(struct ceph_mds_client *mdsc, | |||
469 | ceph_reserve_caps(&req->r_caps_reservation, req->r_num_caps); | 502 | ceph_reserve_caps(&req->r_caps_reservation, req->r_num_caps); |
470 | dout("__register_request %p tid %lld\n", req, req->r_tid); | 503 | dout("__register_request %p tid %lld\n", req, req->r_tid); |
471 | ceph_mdsc_get_request(req); | 504 | ceph_mdsc_get_request(req); |
472 | radix_tree_insert(&mdsc->request_tree, req->r_tid, (void *)req); | 505 | __insert_request(mdsc, req); |
473 | 506 | ||
474 | if (dir) { | 507 | if (dir) { |
475 | struct ceph_inode_info *ci = ceph_inode(dir); | 508 | struct ceph_inode_info *ci = ceph_inode(dir); |
@@ -485,7 +518,7 @@ static void __unregister_request(struct ceph_mds_client *mdsc, | |||
485 | struct ceph_mds_request *req) | 518 | struct ceph_mds_request *req) |
486 | { | 519 | { |
487 | dout("__unregister_request %p tid %lld\n", req, req->r_tid); | 520 | dout("__unregister_request %p tid %lld\n", req, req->r_tid); |
488 | radix_tree_delete(&mdsc->request_tree, req->r_tid); | 521 | rb_erase(&req->r_node, &mdsc->request_tree); |
489 | ceph_mdsc_put_request(req); | 522 | ceph_mdsc_put_request(req); |
490 | 523 | ||
491 | if (req->r_unsafe_dir) { | 524 | if (req->r_unsafe_dir) { |
@@ -1115,17 +1148,25 @@ ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode) | |||
1115 | } | 1148 | } |
1116 | 1149 | ||
1117 | /* | 1150 | /* |
1118 | * return oldest (lowest) tid in request tree, 0 if none. | 1151 | * return oldest (lowest) request, tid in request tree, 0 if none. |
1119 | * | 1152 | * |
1120 | * called under mdsc->mutex. | 1153 | * called under mdsc->mutex. |
1121 | */ | 1154 | */ |
1155 | static struct ceph_mds_request *__get_oldest_req(struct ceph_mds_client *mdsc) | ||
1156 | { | ||
1157 | if (RB_EMPTY_ROOT(&mdsc->request_tree)) | ||
1158 | return NULL; | ||
1159 | return rb_entry(rb_first(&mdsc->request_tree), | ||
1160 | struct ceph_mds_request, r_node); | ||
1161 | } | ||
1162 | |||
1122 | static u64 __get_oldest_tid(struct ceph_mds_client *mdsc) | 1163 | static u64 __get_oldest_tid(struct ceph_mds_client *mdsc) |
1123 | { | 1164 | { |
1124 | struct ceph_mds_request *first; | 1165 | struct ceph_mds_request *req = __get_oldest_req(mdsc); |
1125 | if (radix_tree_gang_lookup(&mdsc->request_tree, | 1166 | |
1126 | (void **)&first, 0, 1) <= 0) | 1167 | if (req) |
1127 | return 0; | 1168 | return req->r_tid; |
1128 | return first->r_tid; | 1169 | return 0; |
1129 | } | 1170 | } |
1130 | 1171 | ||
1131 | /* | 1172 | /* |
@@ -1540,26 +1581,19 @@ static void __wake_requests(struct ceph_mds_client *mdsc, | |||
1540 | */ | 1581 | */ |
1541 | static void kick_requests(struct ceph_mds_client *mdsc, int mds, int all) | 1582 | static void kick_requests(struct ceph_mds_client *mdsc, int mds, int all) |
1542 | { | 1583 | { |
1543 | struct ceph_mds_request *reqs[10]; | 1584 | struct ceph_mds_request *req; |
1544 | u64 nexttid = 0; | 1585 | struct rb_node *p; |
1545 | int i, got; | ||
1546 | 1586 | ||
1547 | dout("kick_requests mds%d\n", mds); | 1587 | dout("kick_requests mds%d\n", mds); |
1548 | while (nexttid <= mdsc->last_tid) { | 1588 | for (p = rb_first(&mdsc->request_tree); p; p = rb_next(p)) { |
1549 | got = radix_tree_gang_lookup(&mdsc->request_tree, | 1589 | req = rb_entry(p, struct ceph_mds_request, r_node); |
1550 | (void **)&reqs, nexttid, 10); | 1590 | if (req->r_got_unsafe) |
1551 | if (got == 0) | 1591 | continue; |
1552 | break; | 1592 | if (req->r_session && |
1553 | nexttid = reqs[got-1]->r_tid + 1; | 1593 | req->r_session->s_mds == mds) { |
1554 | for (i = 0; i < got; i++) { | 1594 | dout(" kicking tid %llu\n", req->r_tid); |
1555 | if (reqs[i]->r_got_unsafe) | 1595 | put_request_session(req); |
1556 | continue; | 1596 | __do_request(mdsc, req); |
1557 | if (reqs[i]->r_session && | ||
1558 | reqs[i]->r_session->s_mds == mds) { | ||
1559 | dout(" kicking tid %llu\n", reqs[i]->r_tid); | ||
1560 | put_request_session(reqs[i]); | ||
1561 | __do_request(mdsc, reqs[i]); | ||
1562 | } | ||
1563 | } | 1597 | } |
1564 | } | 1598 | } |
1565 | } | 1599 | } |
@@ -1748,7 +1782,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) | |||
1748 | list_del_init(&req->r_unsafe_item); | 1782 | list_del_init(&req->r_unsafe_item); |
1749 | 1783 | ||
1750 | /* last unsafe request during umount? */ | 1784 | /* last unsafe request during umount? */ |
1751 | if (mdsc->stopping && !__get_oldest_tid(mdsc)) | 1785 | if (mdsc->stopping && !__get_oldest_req(mdsc)) |
1752 | complete(&mdsc->safe_umount_waiters); | 1786 | complete(&mdsc->safe_umount_waiters); |
1753 | mutex_unlock(&mdsc->mutex); | 1787 | mutex_unlock(&mdsc->mutex); |
1754 | goto out; | 1788 | goto out; |
@@ -2573,7 +2607,7 @@ int ceph_mdsc_init(struct ceph_mds_client *mdsc, struct ceph_client *client) | |||
2573 | INIT_LIST_HEAD(&mdsc->snap_empty); | 2607 | INIT_LIST_HEAD(&mdsc->snap_empty); |
2574 | spin_lock_init(&mdsc->snap_empty_lock); | 2608 | spin_lock_init(&mdsc->snap_empty_lock); |
2575 | mdsc->last_tid = 0; | 2609 | mdsc->last_tid = 0; |
2576 | INIT_RADIX_TREE(&mdsc->request_tree, GFP_NOFS); | 2610 | mdsc->request_tree = RB_ROOT; |
2577 | INIT_DELAYED_WORK(&mdsc->delayed_work, delayed_work); | 2611 | INIT_DELAYED_WORK(&mdsc->delayed_work, delayed_work); |
2578 | mdsc->last_renew_caps = jiffies; | 2612 | mdsc->last_renew_caps = jiffies; |
2579 | INIT_LIST_HEAD(&mdsc->cap_delay_list); | 2613 | INIT_LIST_HEAD(&mdsc->cap_delay_list); |
@@ -2600,20 +2634,19 @@ static void wait_requests(struct ceph_mds_client *mdsc) | |||
2600 | struct ceph_client *client = mdsc->client; | 2634 | struct ceph_client *client = mdsc->client; |
2601 | 2635 | ||
2602 | mutex_lock(&mdsc->mutex); | 2636 | mutex_lock(&mdsc->mutex); |
2603 | if (__get_oldest_tid(mdsc)) { | 2637 | if (__get_oldest_req(mdsc)) { |
2604 | mutex_unlock(&mdsc->mutex); | 2638 | mutex_unlock(&mdsc->mutex); |
2639 | |||
2605 | dout("wait_requests waiting for requests\n"); | 2640 | dout("wait_requests waiting for requests\n"); |
2606 | wait_for_completion_timeout(&mdsc->safe_umount_waiters, | 2641 | wait_for_completion_timeout(&mdsc->safe_umount_waiters, |
2607 | client->mount_args->mount_timeout * HZ); | 2642 | client->mount_args->mount_timeout * HZ); |
2608 | mutex_lock(&mdsc->mutex); | ||
2609 | 2643 | ||
2610 | /* tear down remaining requests */ | 2644 | /* tear down remaining requests */ |
2611 | while (radix_tree_gang_lookup(&mdsc->request_tree, | 2645 | mutex_lock(&mdsc->mutex); |
2612 | (void **)&req, 0, 1)) { | 2646 | while ((req = __get_oldest_req(mdsc))) { |
2613 | dout("wait_requests timed out on tid %llu\n", | 2647 | dout("wait_requests timed out on tid %llu\n", |
2614 | req->r_tid); | 2648 | req->r_tid); |
2615 | radix_tree_delete(&mdsc->request_tree, req->r_tid); | 2649 | __unregister_request(mdsc, req); |
2616 | ceph_mdsc_put_request(req); | ||
2617 | } | 2650 | } |
2618 | } | 2651 | } |
2619 | mutex_unlock(&mdsc->mutex); | 2652 | mutex_unlock(&mdsc->mutex); |
@@ -2639,31 +2672,29 @@ void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc) | |||
2639 | */ | 2672 | */ |
2640 | static void wait_unsafe_requests(struct ceph_mds_client *mdsc, u64 want_tid) | 2673 | static void wait_unsafe_requests(struct ceph_mds_client *mdsc, u64 want_tid) |
2641 | { | 2674 | { |
2642 | struct ceph_mds_request *req; | 2675 | struct ceph_mds_request *req = NULL; |
2643 | u64 next_tid = 0; | 2676 | struct rb_node *n; |
2644 | int got; | ||
2645 | 2677 | ||
2646 | mutex_lock(&mdsc->mutex); | 2678 | mutex_lock(&mdsc->mutex); |
2647 | dout("wait_unsafe_requests want %lld\n", want_tid); | 2679 | dout("wait_unsafe_requests want %lld\n", want_tid); |
2648 | while (1) { | 2680 | req = __get_oldest_req(mdsc); |
2649 | got = radix_tree_gang_lookup(&mdsc->request_tree, (void **)&req, | 2681 | while (req && req->r_tid <= want_tid) { |
2650 | next_tid, 1); | 2682 | if ((req->r_op & CEPH_MDS_OP_WRITE)) { |
2651 | if (!got) | 2683 | /* write op */ |
2652 | break; | 2684 | ceph_mdsc_get_request(req); |
2653 | if (req->r_tid > want_tid) | 2685 | mutex_unlock(&mdsc->mutex); |
2686 | dout("wait_unsafe_requests wait on %llu (want %llu)\n", | ||
2687 | req->r_tid, want_tid); | ||
2688 | wait_for_completion(&req->r_safe_completion); | ||
2689 | mutex_lock(&mdsc->mutex); | ||
2690 | n = rb_next(&req->r_node); | ||
2691 | ceph_mdsc_put_request(req); | ||
2692 | } else { | ||
2693 | n = rb_next(&req->r_node); | ||
2694 | } | ||
2695 | if (!n) | ||
2654 | break; | 2696 | break; |
2655 | 2697 | req = rb_entry(n, struct ceph_mds_request, r_node); | |
2656 | next_tid = req->r_tid + 1; | ||
2657 | if ((req->r_op & CEPH_MDS_OP_WRITE) == 0) | ||
2658 | continue; /* not a write op */ | ||
2659 | |||
2660 | ceph_mdsc_get_request(req); | ||
2661 | mutex_unlock(&mdsc->mutex); | ||
2662 | dout("wait_unsafe_requests wait on %llu (want %llu)\n", | ||
2663 | req->r_tid, want_tid); | ||
2664 | wait_for_completion(&req->r_safe_completion); | ||
2665 | mutex_lock(&mdsc->mutex); | ||
2666 | ceph_mdsc_put_request(req); | ||
2667 | } | 2698 | } |
2668 | mutex_unlock(&mdsc->mutex); | 2699 | mutex_unlock(&mdsc->mutex); |
2669 | dout("wait_unsafe_requests done\n"); | 2700 | dout("wait_unsafe_requests done\n"); |
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index ee71495e27c4..98f09cd06006 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h | |||
@@ -6,6 +6,7 @@ | |||
6 | #include <linux/list.h> | 6 | #include <linux/list.h> |
7 | #include <linux/mutex.h> | 7 | #include <linux/mutex.h> |
8 | #include <linux/radix-tree.h> | 8 | #include <linux/radix-tree.h> |
9 | #include <linux/rbtree.h> | ||
9 | #include <linux/spinlock.h> | 10 | #include <linux/spinlock.h> |
10 | 11 | ||
11 | #include "types.h" | 12 | #include "types.h" |
@@ -150,6 +151,7 @@ typedef void (*ceph_mds_request_callback_t) (struct ceph_mds_client *mdsc, | |||
150 | */ | 151 | */ |
151 | struct ceph_mds_request { | 152 | struct ceph_mds_request { |
152 | u64 r_tid; /* transaction id */ | 153 | u64 r_tid; /* transaction id */ |
154 | struct rb_node r_node; | ||
153 | 155 | ||
154 | int r_op; /* mds op code */ | 156 | int r_op; /* mds op code */ |
155 | int r_mds; | 157 | int r_mds; |
@@ -249,7 +251,7 @@ struct ceph_mds_client { | |||
249 | spinlock_t snap_empty_lock; /* protect snap_empty */ | 251 | spinlock_t snap_empty_lock; /* protect snap_empty */ |
250 | 252 | ||
251 | u64 last_tid; /* most recent mds request */ | 253 | u64 last_tid; /* most recent mds request */ |
252 | struct radix_tree_root request_tree; /* pending mds requests */ | 254 | struct rb_root request_tree; /* pending mds requests */ |
253 | struct delayed_work delayed_work; /* delayed work */ | 255 | struct delayed_work delayed_work; /* delayed work */ |
254 | unsigned long last_renew_caps; /* last time we renewed our caps */ | 256 | unsigned long last_renew_caps; /* last time we renewed our caps */ |
255 | struct list_head cap_delay_list; /* caps with delayed release */ | 257 | struct list_head cap_delay_list; /* caps with delayed release */ |