aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ceph
diff options
context:
space:
mode:
authorSage Weil <sage@newdream.net>2010-02-15 15:08:46 -0500
committerSage Weil <sage@newdream.net>2010-02-17 01:01:08 -0500
commit44ca18f2682eb1cfbed153849adedb79e3e19790 (patch)
tree063dd8382179e65717ec10dc1ffc135950050abd /fs/ceph
parent91e45ce38946a8efa21fefbc65d023ca3c0b434f (diff)
ceph: use rbtree for mds requests
The rbtree is a more appropriate data structure than a radix_tree. It avoids extra memory usage and simplifies the code. It also fixes a bug where the debugfs 'mdsc' file wasn't including the most recent mds request. Signed-off-by: Sage Weil <sage@newdream.net>
Diffstat (limited to 'fs/ceph')
-rw-r--r--fs/ceph/debugfs.c13
-rw-r--r--fs/ceph/mds_client.c149
-rw-r--r--fs/ceph/mds_client.h4
3 files changed, 97 insertions, 69 deletions
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
index fba44b2a6086..cd5dd805e4be 100644
--- a/fs/ceph/debugfs.c
+++ b/fs/ceph/debugfs.c
@@ -142,21 +142,16 @@ static int monc_show(struct seq_file *s, void *p)
142static int mdsc_show(struct seq_file *s, void *p) 142static int mdsc_show(struct seq_file *s, void *p)
143{ 143{
144 struct ceph_client *client = s->private; 144 struct ceph_client *client = s->private;
145 struct ceph_mds_request *req;
146 u64 nexttid = 0;
147 int got;
148 struct ceph_mds_client *mdsc = &client->mdsc; 145 struct ceph_mds_client *mdsc = &client->mdsc;
146 struct ceph_mds_request *req;
147 struct rb_node *rp;
149 int pathlen; 148 int pathlen;
150 u64 pathbase; 149 u64 pathbase;
151 char *path; 150 char *path;
152 151
153 mutex_lock(&mdsc->mutex); 152 mutex_lock(&mdsc->mutex);
154 while (nexttid < mdsc->last_tid) { 153 for (rp = rb_first(&mdsc->request_tree); rp; rp = rb_next(rp)) {
155 got = radix_tree_gang_lookup(&mdsc->request_tree, 154 req = rb_entry(rp, struct ceph_mds_request, r_node);
156 (void **)&req, nexttid, 1);
157 if (got == 0)
158 break;
159 nexttid = req->r_tid + 1;
160 155
161 if (req->r_request) 156 if (req->r_request)
162 seq_printf(s, "%lld\tmds%d\t", req->r_tid, req->r_mds); 157 seq_printf(s, "%lld\tmds%d\t", req->r_tid, req->r_mds);
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index aa8506bad42d..81840d6b68a4 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -255,6 +255,7 @@ static const char *session_state_name(int s)
255 case CEPH_MDS_SESSION_OPEN: return "open"; 255 case CEPH_MDS_SESSION_OPEN: return "open";
256 case CEPH_MDS_SESSION_HUNG: return "hung"; 256 case CEPH_MDS_SESSION_HUNG: return "hung";
257 case CEPH_MDS_SESSION_CLOSING: return "closing"; 257 case CEPH_MDS_SESSION_CLOSING: return "closing";
258 case CEPH_MDS_SESSION_RESTARTING: return "restarting";
258 case CEPH_MDS_SESSION_RECONNECTING: return "reconnecting"; 259 case CEPH_MDS_SESSION_RECONNECTING: return "reconnecting";
259 default: return "???"; 260 default: return "???";
260 } 261 }
@@ -448,10 +449,42 @@ static struct ceph_mds_request *__lookup_request(struct ceph_mds_client *mdsc,
448 u64 tid) 449 u64 tid)
449{ 450{
450 struct ceph_mds_request *req; 451 struct ceph_mds_request *req;
451 req = radix_tree_lookup(&mdsc->request_tree, tid); 452 struct rb_node *n = mdsc->request_tree.rb_node;
452 if (req) 453
453 ceph_mdsc_get_request(req); 454 while (n) {
454 return req; 455 req = rb_entry(n, struct ceph_mds_request, r_node);
456 if (tid < req->r_tid)
457 n = n->rb_left;
458 else if (tid > req->r_tid)
459 n = n->rb_right;
460 else {
461 ceph_mdsc_get_request(req);
462 return req;
463 }
464 }
465 return NULL;
466}
467
468static void __insert_request(struct ceph_mds_client *mdsc,
469 struct ceph_mds_request *new)
470{
471 struct rb_node **p = &mdsc->request_tree.rb_node;
472 struct rb_node *parent = NULL;
473 struct ceph_mds_request *req = NULL;
474
475 while (*p) {
476 parent = *p;
477 req = rb_entry(parent, struct ceph_mds_request, r_node);
478 if (new->r_tid < req->r_tid)
479 p = &(*p)->rb_left;
480 else if (new->r_tid > req->r_tid)
481 p = &(*p)->rb_right;
482 else
483 BUG();
484 }
485
486 rb_link_node(&new->r_node, parent, p);
487 rb_insert_color(&new->r_node, &mdsc->request_tree);
455} 488}
456 489
457/* 490/*
@@ -469,7 +502,7 @@ static void __register_request(struct ceph_mds_client *mdsc,
469 ceph_reserve_caps(&req->r_caps_reservation, req->r_num_caps); 502 ceph_reserve_caps(&req->r_caps_reservation, req->r_num_caps);
470 dout("__register_request %p tid %lld\n", req, req->r_tid); 503 dout("__register_request %p tid %lld\n", req, req->r_tid);
471 ceph_mdsc_get_request(req); 504 ceph_mdsc_get_request(req);
472 radix_tree_insert(&mdsc->request_tree, req->r_tid, (void *)req); 505 __insert_request(mdsc, req);
473 506
474 if (dir) { 507 if (dir) {
475 struct ceph_inode_info *ci = ceph_inode(dir); 508 struct ceph_inode_info *ci = ceph_inode(dir);
@@ -485,7 +518,7 @@ static void __unregister_request(struct ceph_mds_client *mdsc,
485 struct ceph_mds_request *req) 518 struct ceph_mds_request *req)
486{ 519{
487 dout("__unregister_request %p tid %lld\n", req, req->r_tid); 520 dout("__unregister_request %p tid %lld\n", req, req->r_tid);
488 radix_tree_delete(&mdsc->request_tree, req->r_tid); 521 rb_erase(&req->r_node, &mdsc->request_tree);
489 ceph_mdsc_put_request(req); 522 ceph_mdsc_put_request(req);
490 523
491 if (req->r_unsafe_dir) { 524 if (req->r_unsafe_dir) {
@@ -1115,17 +1148,25 @@ ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode)
1115} 1148}
1116 1149
1117/* 1150/*
1118 * return oldest (lowest) tid in request tree, 0 if none. 1151 * return oldest (lowest) request, tid in request tree, 0 if none.
1119 * 1152 *
1120 * called under mdsc->mutex. 1153 * called under mdsc->mutex.
1121 */ 1154 */
1155static struct ceph_mds_request *__get_oldest_req(struct ceph_mds_client *mdsc)
1156{
1157 if (RB_EMPTY_ROOT(&mdsc->request_tree))
1158 return NULL;
1159 return rb_entry(rb_first(&mdsc->request_tree),
1160 struct ceph_mds_request, r_node);
1161}
1162
1122static u64 __get_oldest_tid(struct ceph_mds_client *mdsc) 1163static u64 __get_oldest_tid(struct ceph_mds_client *mdsc)
1123{ 1164{
1124 struct ceph_mds_request *first; 1165 struct ceph_mds_request *req = __get_oldest_req(mdsc);
1125 if (radix_tree_gang_lookup(&mdsc->request_tree, 1166
1126 (void **)&first, 0, 1) <= 0) 1167 if (req)
1127 return 0; 1168 return req->r_tid;
1128 return first->r_tid; 1169 return 0;
1129} 1170}
1130 1171
1131/* 1172/*
@@ -1540,26 +1581,19 @@ static void __wake_requests(struct ceph_mds_client *mdsc,
1540 */ 1581 */
1541static void kick_requests(struct ceph_mds_client *mdsc, int mds, int all) 1582static void kick_requests(struct ceph_mds_client *mdsc, int mds, int all)
1542{ 1583{
1543 struct ceph_mds_request *reqs[10]; 1584 struct ceph_mds_request *req;
1544 u64 nexttid = 0; 1585 struct rb_node *p;
1545 int i, got;
1546 1586
1547 dout("kick_requests mds%d\n", mds); 1587 dout("kick_requests mds%d\n", mds);
1548 while (nexttid <= mdsc->last_tid) { 1588 for (p = rb_first(&mdsc->request_tree); p; p = rb_next(p)) {
1549 got = radix_tree_gang_lookup(&mdsc->request_tree, 1589 req = rb_entry(p, struct ceph_mds_request, r_node);
1550 (void **)&reqs, nexttid, 10); 1590 if (req->r_got_unsafe)
1551 if (got == 0) 1591 continue;
1552 break; 1592 if (req->r_session &&
1553 nexttid = reqs[got-1]->r_tid + 1; 1593 req->r_session->s_mds == mds) {
1554 for (i = 0; i < got; i++) { 1594 dout(" kicking tid %llu\n", req->r_tid);
1555 if (reqs[i]->r_got_unsafe) 1595 put_request_session(req);
1556 continue; 1596 __do_request(mdsc, req);
1557 if (reqs[i]->r_session &&
1558 reqs[i]->r_session->s_mds == mds) {
1559 dout(" kicking tid %llu\n", reqs[i]->r_tid);
1560 put_request_session(reqs[i]);
1561 __do_request(mdsc, reqs[i]);
1562 }
1563 } 1597 }
1564 } 1598 }
1565} 1599}
@@ -1748,7 +1782,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
1748 list_del_init(&req->r_unsafe_item); 1782 list_del_init(&req->r_unsafe_item);
1749 1783
1750 /* last unsafe request during umount? */ 1784 /* last unsafe request during umount? */
1751 if (mdsc->stopping && !__get_oldest_tid(mdsc)) 1785 if (mdsc->stopping && !__get_oldest_req(mdsc))
1752 complete(&mdsc->safe_umount_waiters); 1786 complete(&mdsc->safe_umount_waiters);
1753 mutex_unlock(&mdsc->mutex); 1787 mutex_unlock(&mdsc->mutex);
1754 goto out; 1788 goto out;
@@ -2573,7 +2607,7 @@ int ceph_mdsc_init(struct ceph_mds_client *mdsc, struct ceph_client *client)
2573 INIT_LIST_HEAD(&mdsc->snap_empty); 2607 INIT_LIST_HEAD(&mdsc->snap_empty);
2574 spin_lock_init(&mdsc->snap_empty_lock); 2608 spin_lock_init(&mdsc->snap_empty_lock);
2575 mdsc->last_tid = 0; 2609 mdsc->last_tid = 0;
2576 INIT_RADIX_TREE(&mdsc->request_tree, GFP_NOFS); 2610 mdsc->request_tree = RB_ROOT;
2577 INIT_DELAYED_WORK(&mdsc->delayed_work, delayed_work); 2611 INIT_DELAYED_WORK(&mdsc->delayed_work, delayed_work);
2578 mdsc->last_renew_caps = jiffies; 2612 mdsc->last_renew_caps = jiffies;
2579 INIT_LIST_HEAD(&mdsc->cap_delay_list); 2613 INIT_LIST_HEAD(&mdsc->cap_delay_list);
@@ -2600,20 +2634,19 @@ static void wait_requests(struct ceph_mds_client *mdsc)
2600 struct ceph_client *client = mdsc->client; 2634 struct ceph_client *client = mdsc->client;
2601 2635
2602 mutex_lock(&mdsc->mutex); 2636 mutex_lock(&mdsc->mutex);
2603 if (__get_oldest_tid(mdsc)) { 2637 if (__get_oldest_req(mdsc)) {
2604 mutex_unlock(&mdsc->mutex); 2638 mutex_unlock(&mdsc->mutex);
2639
2605 dout("wait_requests waiting for requests\n"); 2640 dout("wait_requests waiting for requests\n");
2606 wait_for_completion_timeout(&mdsc->safe_umount_waiters, 2641 wait_for_completion_timeout(&mdsc->safe_umount_waiters,
2607 client->mount_args->mount_timeout * HZ); 2642 client->mount_args->mount_timeout * HZ);
2608 mutex_lock(&mdsc->mutex);
2609 2643
2610 /* tear down remaining requests */ 2644 /* tear down remaining requests */
2611 while (radix_tree_gang_lookup(&mdsc->request_tree, 2645 mutex_lock(&mdsc->mutex);
2612 (void **)&req, 0, 1)) { 2646 while ((req = __get_oldest_req(mdsc))) {
2613 dout("wait_requests timed out on tid %llu\n", 2647 dout("wait_requests timed out on tid %llu\n",
2614 req->r_tid); 2648 req->r_tid);
2615 radix_tree_delete(&mdsc->request_tree, req->r_tid); 2649 __unregister_request(mdsc, req);
2616 ceph_mdsc_put_request(req);
2617 } 2650 }
2618 } 2651 }
2619 mutex_unlock(&mdsc->mutex); 2652 mutex_unlock(&mdsc->mutex);
@@ -2639,31 +2672,29 @@ void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc)
2639 */ 2672 */
2640static void wait_unsafe_requests(struct ceph_mds_client *mdsc, u64 want_tid) 2673static void wait_unsafe_requests(struct ceph_mds_client *mdsc, u64 want_tid)
2641{ 2674{
2642 struct ceph_mds_request *req; 2675 struct ceph_mds_request *req = NULL;
2643 u64 next_tid = 0; 2676 struct rb_node *n;
2644 int got;
2645 2677
2646 mutex_lock(&mdsc->mutex); 2678 mutex_lock(&mdsc->mutex);
2647 dout("wait_unsafe_requests want %lld\n", want_tid); 2679 dout("wait_unsafe_requests want %lld\n", want_tid);
2648 while (1) { 2680 req = __get_oldest_req(mdsc);
2649 got = radix_tree_gang_lookup(&mdsc->request_tree, (void **)&req, 2681 while (req && req->r_tid <= want_tid) {
2650 next_tid, 1); 2682 if ((req->r_op & CEPH_MDS_OP_WRITE)) {
2651 if (!got) 2683 /* write op */
2652 break; 2684 ceph_mdsc_get_request(req);
2653 if (req->r_tid > want_tid) 2685 mutex_unlock(&mdsc->mutex);
2686 dout("wait_unsafe_requests wait on %llu (want %llu)\n",
2687 req->r_tid, want_tid);
2688 wait_for_completion(&req->r_safe_completion);
2689 mutex_lock(&mdsc->mutex);
2690 n = rb_next(&req->r_node);
2691 ceph_mdsc_put_request(req);
2692 } else {
2693 n = rb_next(&req->r_node);
2694 }
2695 if (!n)
2654 break; 2696 break;
2655 2697 req = rb_entry(n, struct ceph_mds_request, r_node);
2656 next_tid = req->r_tid + 1;
2657 if ((req->r_op & CEPH_MDS_OP_WRITE) == 0)
2658 continue; /* not a write op */
2659
2660 ceph_mdsc_get_request(req);
2661 mutex_unlock(&mdsc->mutex);
2662 dout("wait_unsafe_requests wait on %llu (want %llu)\n",
2663 req->r_tid, want_tid);
2664 wait_for_completion(&req->r_safe_completion);
2665 mutex_lock(&mdsc->mutex);
2666 ceph_mdsc_put_request(req);
2667 } 2698 }
2668 mutex_unlock(&mdsc->mutex); 2699 mutex_unlock(&mdsc->mutex);
2669 dout("wait_unsafe_requests done\n"); 2700 dout("wait_unsafe_requests done\n");
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index ee71495e27c4..98f09cd06006 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -6,6 +6,7 @@
6#include <linux/list.h> 6#include <linux/list.h>
7#include <linux/mutex.h> 7#include <linux/mutex.h>
8#include <linux/radix-tree.h> 8#include <linux/radix-tree.h>
9#include <linux/rbtree.h>
9#include <linux/spinlock.h> 10#include <linux/spinlock.h>
10 11
11#include "types.h" 12#include "types.h"
@@ -150,6 +151,7 @@ typedef void (*ceph_mds_request_callback_t) (struct ceph_mds_client *mdsc,
150 */ 151 */
151struct ceph_mds_request { 152struct ceph_mds_request {
152 u64 r_tid; /* transaction id */ 153 u64 r_tid; /* transaction id */
154 struct rb_node r_node;
153 155
154 int r_op; /* mds op code */ 156 int r_op; /* mds op code */
155 int r_mds; 157 int r_mds;
@@ -249,7 +251,7 @@ struct ceph_mds_client {
249 spinlock_t snap_empty_lock; /* protect snap_empty */ 251 spinlock_t snap_empty_lock; /* protect snap_empty */
250 252
251 u64 last_tid; /* most recent mds request */ 253 u64 last_tid; /* most recent mds request */
252 struct radix_tree_root request_tree; /* pending mds requests */ 254 struct rb_root request_tree; /* pending mds requests */
253 struct delayed_work delayed_work; /* delayed work */ 255 struct delayed_work delayed_work; /* delayed work */
254 unsigned long last_renew_caps; /* last time we renewed our caps */ 256 unsigned long last_renew_caps; /* last time we renewed our caps */
255 struct list_head cap_delay_list; /* caps with delayed release */ 257 struct list_head cap_delay_list; /* caps with delayed release */