svcrdma: Change WR context get/put to use the kmem cache

Change the WR context pool to be shared across mount points. This reduces the RDMA transport memory footprint significantly since idle mounts don't consume WR context memory. Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
author: Tom Tucker <tom@opengridcomputing.com> 2008-05-28 16:14:02 -0400
committer: Tom Tucker <tom@opengridcomputing.com> 2008-07-02 16:02:02 -0400
commit: 8948896c9e098c6fd31a6a698a598a7cbd7fa40e (patch)
tree: 465a4d93c8becb0b2e4750cc1286391916f887b5
parent: bf5927d84e70d522f234ca247b27d27c63878b93 (diff)
2 files changed, 12 insertions, 115 deletions
diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index d8d74c4ab504..ef2e3a20bf3b 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -73,7 +73,6 @@ extern atomic_t rdma_stat_sq_prod;
 struct svc_rdma_op_ctxt {
        struct svc_rdma_op_ctxt *read_hdr;
        int hdr_count;
-        struct list_head free_list;
        struct xdr_buf arg;
        struct list_head dto_q;
        enum ib_wr_opcode wr_op;
@@ -131,11 +130,6 @@ struct svcxprt_rdma {
        atomic_t             sc_dma_used;
        atomic_t             sc_ctxt_used;
-        struct list_head     sc_ctxt_free;
-        int                  sc_ctxt_cnt;
-        int                  sc_ctxt_bump;
-        int                  sc_ctxt_max;
-        spinlock_t           sc_ctxt_lock;
        struct list_head     sc_rq_dto_q;
        spinlock_t           sc_rq_dto_lock;
        struct ib_qp         *sc_qp;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 80104f4999d5..19ddc382b777 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -84,69 +84,23 @@ struct svc_xprt_class svc_rdma_class = {
        .xcl_max_payload = RPCSVC_MAXPAYLOAD_TCP,
 };
-static int rdma_bump_context_cache(struct svcxprt_rdma *xprt)
+/* WR context cache. Created in svc_rdma.c  */
-{
+extern struct kmem_cache *svc_rdma_ctxt_cachep;
-        int target;
-        int at_least_one = 0;
-        struct svc_rdma_op_ctxt *ctxt;
-        target = min(xprt->sc_ctxt_cnt + xprt->sc_ctxt_bump,
-                     xprt->sc_ctxt_max);
-        spin_lock_bh(&xprt->sc_ctxt_lock);
-        while (xprt->sc_ctxt_cnt < target) {
-                xprt->sc_ctxt_cnt++;
-                spin_unlock_bh(&xprt->sc_ctxt_lock);
-                ctxt = kmalloc(sizeof(*ctxt), GFP_KERNEL);
-                spin_lock_bh(&xprt->sc_ctxt_lock);
-                if (ctxt) {
-                        at_least_one = 1;
-                        INIT_LIST_HEAD(&ctxt->free_list);
-                        list_add(&ctxt->free_list, &xprt->sc_ctxt_free);
-                } else {
-                        /* kmalloc failed...give up for now */
-                        xprt->sc_ctxt_cnt--;
-                        break;
-                }
-        }
-        spin_unlock_bh(&xprt->sc_ctxt_lock);
-        dprintk("svcrdma: sc_ctxt_max=%d, sc_ctxt_cnt=%d\n",
-                xprt->sc_ctxt_max, xprt->sc_ctxt_cnt);
-        return at_least_one;
-}
 struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt)
 {
        struct svc_rdma_op_ctxt *ctxt;
        while (1) {
-                spin_lock_bh(&xprt->sc_ctxt_lock);
+                ctxt = kmem_cache_alloc(svc_rdma_ctxt_cachep, GFP_KERNEL);
-                if (unlikely(list_empty(&xprt->sc_ctxt_free))) {
+                if (ctxt)
-                        /* Try to bump my cache. */
+                        break;
-                        spin_unlock_bh(&xprt->sc_ctxt_lock);
+                schedule_timeout_uninterruptible(msecs_to_jiffies(500));
-                        if (rdma_bump_context_cache(xprt))
-                                continue;
-                        printk(KERN_INFO "svcrdma: sleeping waiting for "
-                               "context memory on xprt=%p\n",
-                               xprt);
-                        schedule_timeout_uninterruptible(msecs_to_jiffies(500));
-                        continue;
-                }
-                ctxt = list_entry(xprt->sc_ctxt_free.next,
-                                  struct svc_rdma_op_ctxt,
-                                  free_list);
-                list_del_init(&ctxt->free_list);
-                spin_unlock_bh(&xprt->sc_ctxt_lock);
-                ctxt->xprt = xprt;
-                INIT_LIST_HEAD(&ctxt->dto_q);
-                ctxt->count = 0;
-                atomic_inc(&xprt->sc_ctxt_used);
-                break;
        }
+        ctxt->xprt = xprt;
+        INIT_LIST_HEAD(&ctxt->dto_q);
+        ctxt->count = 0;
+        atomic_inc(&xprt->sc_ctxt_used);
        return ctxt;
 }
@@ -174,9 +128,7 @@ void svc_rdma_put_context(struct svc_rdma_op_ctxt *ctxt, int free_pages)
                for (i = 0; i < ctxt->count; i++)
                        put_page(ctxt->pages[i]);
-        spin_lock_bh(&xprt->sc_ctxt_lock);
+        kmem_cache_free(svc_rdma_ctxt_cachep, ctxt);
-        list_add(&ctxt->free_list, &xprt->sc_ctxt_free);
-        spin_unlock_bh(&xprt->sc_ctxt_lock);
        atomic_dec(&xprt->sc_ctxt_used);
 }
@@ -461,40 +413,6 @@ static void sq_comp_handler(struct ib_cq *cq, void *cq_context)
        tasklet_schedule(&dto_tasklet);
 }
-static void create_context_cache(struct svcxprt_rdma *xprt,
-                                 int ctxt_count, int ctxt_bump, int ctxt_max)
-{
-        struct svc_rdma_op_ctxt *ctxt;
-        int i;
-        xprt->sc_ctxt_max = ctxt_max;
-        xprt->sc_ctxt_bump = ctxt_bump;
-        xprt->sc_ctxt_cnt = 0;
-        atomic_set(&xprt->sc_ctxt_used, 0);
-        INIT_LIST_HEAD(&xprt->sc_ctxt_free);
-        for (i = 0; i < ctxt_count; i++) {
-                ctxt = kmalloc(sizeof(*ctxt), GFP_KERNEL);
-                if (ctxt) {
-                        INIT_LIST_HEAD(&ctxt->free_list);
-                        list_add(&ctxt->free_list, &xprt->sc_ctxt_free);
-                        xprt->sc_ctxt_cnt++;
-                }
-        }
-}
-static void destroy_context_cache(struct svcxprt_rdma *xprt)
-{
-        while (!list_empty(&xprt->sc_ctxt_free)) {
-                struct svc_rdma_op_ctxt *ctxt;
-                ctxt = list_entry(xprt->sc_ctxt_free.next,
-                                  struct svc_rdma_op_ctxt,
-                                  free_list);
-                list_del_init(&ctxt->free_list);
-                kfree(ctxt);
-        }
-}
 static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
                                             int listener)
 {
@@ -511,7 +429,6 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
        spin_lock_init(&cma_xprt->sc_lock);
        spin_lock_init(&cma_xprt->sc_read_complete_lock);
-        spin_lock_init(&cma_xprt->sc_ctxt_lock);
        spin_lock_init(&cma_xprt->sc_rq_dto_lock);
        cma_xprt->sc_ord = svcrdma_ord;
@@ -522,20 +439,7 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
        atomic_set(&cma_xprt->sc_sq_count, 0);
        atomic_set(&cma_xprt->sc_ctxt_used, 0);
-        if (!listener) {
+        if (listener)
-                int reqs = cma_xprt->sc_max_requests;
-                create_context_cache(cma_xprt,
-                                     reqs << 1, /* starting size */
-                                     reqs,      /* bump amount */
-                                     reqs +
-                                     cma_xprt->sc_sq_depth +
-                                     RPCRDMA_MAX_THREADS + 1); /* max */
-                if (list_empty(&cma_xprt->sc_ctxt_free)) {
-                        kfree(cma_xprt);
-                        return NULL;
-                }
-                clear_bit(XPT_LISTENER, &cma_xprt->sc_xprt.xpt_flags);
-        } else
                set_bit(XPT_LISTENER, &cma_xprt->sc_xprt.xpt_flags);
        return cma_xprt;
@@ -1077,7 +981,6 @@ static void __svc_rdma_free(struct work_struct *work)
        /* Destroy the CM ID */
        rdma_destroy_id(rdma->sc_cm_id);
-        destroy_context_cache(rdma);
        kfree(rdma);
 }
author	Tom Tucker <tom@opengridcomputing.com>	2008-05-28 16:14:02 -0400
committer	Tom Tucker <tom@opengridcomputing.com>	2008-07-02 16:02:02 -0400
commit	8948896c9e098c6fd31a6a698a598a7cbd7fa40e (patch)
tree	465a4d93c8becb0b2e4750cc1286391916f887b5
parent	bf5927d84e70d522f234ca247b27d27c63878b93 (diff)

diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index d8d74c4ab504..ef2e3a20bf3b 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h
@@ -73,7 +73,6 @@ extern atomic_t rdma_stat_sq_prod;
73	struct svc_rdma_op_ctxt {	73	struct svc_rdma_op_ctxt {
74	struct svc_rdma_op_ctxt *read_hdr;	74	struct svc_rdma_op_ctxt *read_hdr;
75	int hdr_count;	75	int hdr_count;
76	struct list_head free_list;
77	struct xdr_buf arg;	76	struct xdr_buf arg;
78	struct list_head dto_q;	77	struct list_head dto_q;
79	enum ib_wr_opcode wr_op;	78	enum ib_wr_opcode wr_op;
@@ -131,11 +130,6 @@ struct svcxprt_rdma {
131		130
132	atomic_t sc_dma_used;	131	atomic_t sc_dma_used;
133	atomic_t sc_ctxt_used;	132	atomic_t sc_ctxt_used;
134	struct list_head sc_ctxt_free;
135	int sc_ctxt_cnt;
136	int sc_ctxt_bump;
137	int sc_ctxt_max;
138	spinlock_t sc_ctxt_lock;
139	struct list_head sc_rq_dto_q;	133	struct list_head sc_rq_dto_q;
140	spinlock_t sc_rq_dto_lock;	134	spinlock_t sc_rq_dto_lock;
141	struct ib_qp *sc_qp;	135	struct ib_qp *sc_qp;


diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index 80104f4999d5..19ddc382b777 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -84,69 +84,23 @@ struct svc_xprt_class svc_rdma_class = {
84	.xcl_max_payload = RPCSVC_MAXPAYLOAD_TCP,	84	.xcl_max_payload = RPCSVC_MAXPAYLOAD_TCP,
85	};	85	};
86		86
87	static int rdma_bump_context_cache(struct svcxprt_rdma *xprt)	87	/* WR context cache. Created in svc_rdma.c */
88	{	88	extern struct kmem_cache *svc_rdma_ctxt_cachep;
89	int target;
90	int at_least_one = 0;
91	struct svc_rdma_op_ctxt *ctxt;
92
93	target = min(xprt->sc_ctxt_cnt + xprt->sc_ctxt_bump,
94	xprt->sc_ctxt_max);
95
96	spin_lock_bh(&xprt->sc_ctxt_lock);
97	while (xprt->sc_ctxt_cnt < target) {
98	xprt->sc_ctxt_cnt++;
99	spin_unlock_bh(&xprt->sc_ctxt_lock);
100
101	ctxt = kmalloc(sizeof(*ctxt), GFP_KERNEL);
102
103	spin_lock_bh(&xprt->sc_ctxt_lock);
104	if (ctxt) {
105	at_least_one = 1;
106	INIT_LIST_HEAD(&ctxt->free_list);
107	list_add(&ctxt->free_list, &xprt->sc_ctxt_free);
108	} else {
109	/* kmalloc failed...give up for now */
110	xprt->sc_ctxt_cnt--;
111	break;
112	}
113	}
114	spin_unlock_bh(&xprt->sc_ctxt_lock);
115	dprintk("svcrdma: sc_ctxt_max=%d, sc_ctxt_cnt=%d\n",
116	xprt->sc_ctxt_max, xprt->sc_ctxt_cnt);
117	return at_least_one;
118	}
119		89
120	struct svc_rdma_op_ctxt svc_rdma_get_context(struct svcxprt_rdma xprt)	90	struct svc_rdma_op_ctxt svc_rdma_get_context(struct svcxprt_rdma xprt)
121	{	91	{
122	struct svc_rdma_op_ctxt *ctxt;	92	struct svc_rdma_op_ctxt *ctxt;
123		93
124	while (1) {	94	while (1) {
125	spin_lock_bh(&xprt->sc_ctxt_lock);	95	ctxt = kmem_cache_alloc(svc_rdma_ctxt_cachep, GFP_KERNEL);
126	if (unlikely(list_empty(&xprt->sc_ctxt_free))) {	96	if (ctxt)
127	/* Try to bump my cache. */	97	break;
128	spin_unlock_bh(&xprt->sc_ctxt_lock);	98	schedule_timeout_uninterruptible(msecs_to_jiffies(500));
129
130	if (rdma_bump_context_cache(xprt))
131	continue;
132
133	printk(KERN_INFO "svcrdma: sleeping waiting for "
134	"context memory on xprt=%p\n",
135	xprt);
136	schedule_timeout_uninterruptible(msecs_to_jiffies(500));
137	continue;
138	}
139	ctxt = list_entry(xprt->sc_ctxt_free.next,
140	struct svc_rdma_op_ctxt,
141	free_list);
142	list_del_init(&ctxt->free_list);
143	spin_unlock_bh(&xprt->sc_ctxt_lock);
144	ctxt->xprt = xprt;
145	INIT_LIST_HEAD(&ctxt->dto_q);
146	ctxt->count = 0;
147	atomic_inc(&xprt->sc_ctxt_used);
148	break;
149	}	99	}
		100	ctxt->xprt = xprt;
		101	INIT_LIST_HEAD(&ctxt->dto_q);
		102	ctxt->count = 0;
		103	atomic_inc(&xprt->sc_ctxt_used);
150	return ctxt;	104	return ctxt;
151	}	105	}
152		106
@@ -174,9 +128,7 @@ void svc_rdma_put_context(struct svc_rdma_op_ctxt *ctxt, int free_pages)
174	for (i = 0; i < ctxt->count; i++)	128	for (i = 0; i < ctxt->count; i++)
175	put_page(ctxt->pages[i]);	129	put_page(ctxt->pages[i]);
176		130
177	spin_lock_bh(&xprt->sc_ctxt_lock);	131	kmem_cache_free(svc_rdma_ctxt_cachep, ctxt);
178	list_add(&ctxt->free_list, &xprt->sc_ctxt_free);
179	spin_unlock_bh(&xprt->sc_ctxt_lock);
180	atomic_dec(&xprt->sc_ctxt_used);	132	atomic_dec(&xprt->sc_ctxt_used);
181	}	133	}
182		134
@@ -461,40 +413,6 @@ static void sq_comp_handler(struct ib_cq cq, void cq_context)
461	tasklet_schedule(&dto_tasklet);	413	tasklet_schedule(&dto_tasklet);
462	}	414	}
463		415
464	static void create_context_cache(struct svcxprt_rdma *xprt,
465	int ctxt_count, int ctxt_bump, int ctxt_max)
466	{
467	struct svc_rdma_op_ctxt *ctxt;
468	int i;
469
470	xprt->sc_ctxt_max = ctxt_max;
471	xprt->sc_ctxt_bump = ctxt_bump;
472	xprt->sc_ctxt_cnt = 0;
473	atomic_set(&xprt->sc_ctxt_used, 0);
474
475	INIT_LIST_HEAD(&xprt->sc_ctxt_free);
476	for (i = 0; i < ctxt_count; i++) {
477	ctxt = kmalloc(sizeof(*ctxt), GFP_KERNEL);
478	if (ctxt) {
479	INIT_LIST_HEAD(&ctxt->free_list);
480	list_add(&ctxt->free_list, &xprt->sc_ctxt_free);
481	xprt->sc_ctxt_cnt++;
482	}
483	}
484	}
485
486	static void destroy_context_cache(struct svcxprt_rdma *xprt)
487	{
488	while (!list_empty(&xprt->sc_ctxt_free)) {
489	struct svc_rdma_op_ctxt *ctxt;
490	ctxt = list_entry(xprt->sc_ctxt_free.next,
491	struct svc_rdma_op_ctxt,
492	free_list);
493	list_del_init(&ctxt->free_list);
494	kfree(ctxt);
495	}
496	}
497
498	static struct svcxprt_rdma rdma_create_xprt(struct svc_serv serv,	416	static struct svcxprt_rdma rdma_create_xprt(struct svc_serv serv,
499	int listener)	417	int listener)
500	{	418	{
@@ -511,7 +429,6 @@ static struct svcxprt_rdma rdma_create_xprt(struct svc_serv serv,
511		429
512	spin_lock_init(&cma_xprt->sc_lock);	430	spin_lock_init(&cma_xprt->sc_lock);
513	spin_lock_init(&cma_xprt->sc_read_complete_lock);	431	spin_lock_init(&cma_xprt->sc_read_complete_lock);
514	spin_lock_init(&cma_xprt->sc_ctxt_lock);
515	spin_lock_init(&cma_xprt->sc_rq_dto_lock);	432	spin_lock_init(&cma_xprt->sc_rq_dto_lock);
516		433
517	cma_xprt->sc_ord = svcrdma_ord;	434	cma_xprt->sc_ord = svcrdma_ord;
@@ -522,20 +439,7 @@ static struct svcxprt_rdma rdma_create_xprt(struct svc_serv serv,
522	atomic_set(&cma_xprt->sc_sq_count, 0);	439	atomic_set(&cma_xprt->sc_sq_count, 0);
523	atomic_set(&cma_xprt->sc_ctxt_used, 0);	440	atomic_set(&cma_xprt->sc_ctxt_used, 0);
524		441
525	if (!listener) {	442	if (listener)
526	int reqs = cma_xprt->sc_max_requests;
527	create_context_cache(cma_xprt,
528	reqs << 1, /* starting size */
529	reqs, /* bump amount */
530	reqs +
531	cma_xprt->sc_sq_depth +
532	RPCRDMA_MAX_THREADS + 1); /* max */
533	if (list_empty(&cma_xprt->sc_ctxt_free)) {
534	kfree(cma_xprt);
535	return NULL;
536	}
537	clear_bit(XPT_LISTENER, &cma_xprt->sc_xprt.xpt_flags);
538	} else
539	set_bit(XPT_LISTENER, &cma_xprt->sc_xprt.xpt_flags);	443	set_bit(XPT_LISTENER, &cma_xprt->sc_xprt.xpt_flags);
540		444
541	return cma_xprt;	445	return cma_xprt;
@@ -1077,7 +981,6 @@ static void __svc_rdma_free(struct work_struct *work)
1077	/* Destroy the CM ID */	981	/* Destroy the CM ID */
1078	rdma_destroy_id(rdma->sc_cm_id);	982	rdma_destroy_id(rdma->sc_cm_id);
1079		983
1080	destroy_context_cache(rdma);
1081	kfree(rdma);	984	kfree(rdma);
1082	}	985	}
1083		986