aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChuck Lever <cel@netapp.com>2006-01-03 03:55:49 -0500
committerTrond Myklebust <Trond.Myklebust@netapp.com>2006-01-06 14:58:55 -0500
commit02107148349f31eee7c0fb06fd7a880df73dbd20 (patch)
tree37bffd81e08b8e50394ce89a1aa7a3961f0ffbe7
parent03c21733938aad0758f5f88e1cc7ede69fc3c910 (diff)
SUNRPC: switchable buffer allocation
Add RPC client transport switch support for replacing buffer management on a per-transport basis. In the current IPv4 socket transport implementation, RPC buffers are allocated as needed for each RPC message that is sent. Some transport implementations may choose to use pre-allocated buffers for encoding, sending, receiving, and unmarshalling RPC messages, however. For transports capable of direct data placement, the buffers can be carved out of a pre-registered area of memory rather than from a slab cache. Test-plan: Millions of fsx operations. Performance characterization with "sio" and "iozone". Use oprofile and other tools to look for significant regression in CPU utilization. Signed-off-by: Chuck Lever <cel@netapp.com> Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
-rw-r--r--include/linux/sunrpc/sched.h3
-rw-r--r--include/linux/sunrpc/xprt.h10
-rw-r--r--net/sunrpc/clnt.c14
-rw-r--r--net/sunrpc/sched.c50
-rw-r--r--net/sunrpc/xprt.c3
-rw-r--r--net/sunrpc/xprtsock.c5
6 files changed, 49 insertions, 36 deletions
diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index 94b0afa4ab05..8b25629accd8 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -52,8 +52,6 @@ struct rpc_task {
52 * RPC call state 52 * RPC call state
53 */ 53 */
54 struct rpc_message tk_msg; /* RPC call info */ 54 struct rpc_message tk_msg; /* RPC call info */
55 __u32 * tk_buffer; /* XDR buffer */
56 size_t tk_bufsize;
57 __u8 tk_garb_retry; 55 __u8 tk_garb_retry;
58 __u8 tk_cred_retry; 56 __u8 tk_cred_retry;
59 57
@@ -268,6 +266,7 @@ struct rpc_task *rpc_wake_up_next(struct rpc_wait_queue *);
268void rpc_wake_up_status(struct rpc_wait_queue *, int); 266void rpc_wake_up_status(struct rpc_wait_queue *, int);
269void rpc_delay(struct rpc_task *, unsigned long); 267void rpc_delay(struct rpc_task *, unsigned long);
270void * rpc_malloc(struct rpc_task *, size_t); 268void * rpc_malloc(struct rpc_task *, size_t);
269void rpc_free(struct rpc_task *);
271int rpciod_up(void); 270int rpciod_up(void);
272void rpciod_down(void); 271void rpciod_down(void);
273void rpciod_wake_up(void); 272void rpciod_wake_up(void);
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index 3b8b6e823c70..7885b9621ce3 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -79,21 +79,19 @@ struct rpc_rqst {
79 void (*rq_release_snd_buf)(struct rpc_rqst *); /* release rq_enc_pages */ 79 void (*rq_release_snd_buf)(struct rpc_rqst *); /* release rq_enc_pages */
80 struct list_head rq_list; 80 struct list_head rq_list;
81 81
82 __u32 * rq_buffer; /* XDR encode buffer */
83 size_t rq_bufsize;
84
82 struct xdr_buf rq_private_buf; /* The receive buffer 85 struct xdr_buf rq_private_buf; /* The receive buffer
83 * used in the softirq. 86 * used in the softirq.
84 */ 87 */
85 unsigned long rq_majortimeo; /* major timeout alarm */ 88 unsigned long rq_majortimeo; /* major timeout alarm */
86 unsigned long rq_timeout; /* Current timeout value */ 89 unsigned long rq_timeout; /* Current timeout value */
87 unsigned int rq_retries; /* # of retries */ 90 unsigned int rq_retries; /* # of retries */
88 /*
89 * For authentication (e.g. auth_des)
90 */
91 u32 rq_creddata[2];
92 91
93 /* 92 /*
94 * Partial send handling 93 * Partial send handling
95 */ 94 */
96
97 u32 rq_bytes_sent; /* Bytes we have sent */ 95 u32 rq_bytes_sent; /* Bytes we have sent */
98 96
99 unsigned long rq_xtime; /* when transmitted */ 97 unsigned long rq_xtime; /* when transmitted */
@@ -107,6 +105,8 @@ struct rpc_xprt_ops {
107 int (*reserve_xprt)(struct rpc_task *task); 105 int (*reserve_xprt)(struct rpc_task *task);
108 void (*release_xprt)(struct rpc_xprt *xprt, struct rpc_task *task); 106 void (*release_xprt)(struct rpc_xprt *xprt, struct rpc_task *task);
109 void (*connect)(struct rpc_task *task); 107 void (*connect)(struct rpc_task *task);
108 void * (*buf_alloc)(struct rpc_task *task, size_t size);
109 void (*buf_free)(struct rpc_task *task);
110 int (*send_request)(struct rpc_task *task); 110 int (*send_request)(struct rpc_task *task);
111 void (*set_retrans_timeout)(struct rpc_task *task); 111 void (*set_retrans_timeout)(struct rpc_task *task);
112 void (*timer)(struct rpc_task *task); 112 void (*timer)(struct rpc_task *task);
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index b23c0d328c9c..25cba94c5683 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -644,24 +644,26 @@ call_reserveresult(struct rpc_task *task)
644 644
645/* 645/*
646 * 2. Allocate the buffer. For details, see sched.c:rpc_malloc. 646 * 2. Allocate the buffer. For details, see sched.c:rpc_malloc.
647 * (Note: buffer memory is freed in rpc_task_release). 647 * (Note: buffer memory is freed in xprt_release).
648 */ 648 */
649static void 649static void
650call_allocate(struct rpc_task *task) 650call_allocate(struct rpc_task *task)
651{ 651{
652 struct rpc_rqst *req = task->tk_rqstp;
653 struct rpc_xprt *xprt = task->tk_xprt;
652 unsigned int bufsiz; 654 unsigned int bufsiz;
653 655
654 dprintk("RPC: %4d call_allocate (status %d)\n", 656 dprintk("RPC: %4d call_allocate (status %d)\n",
655 task->tk_pid, task->tk_status); 657 task->tk_pid, task->tk_status);
656 task->tk_action = call_bind; 658 task->tk_action = call_bind;
657 if (task->tk_buffer) 659 if (req->rq_buffer)
658 return; 660 return;
659 661
660 /* FIXME: compute buffer requirements more exactly using 662 /* FIXME: compute buffer requirements more exactly using
661 * auth->au_wslack */ 663 * auth->au_wslack */
662 bufsiz = task->tk_msg.rpc_proc->p_bufsiz + RPC_SLACK_SPACE; 664 bufsiz = task->tk_msg.rpc_proc->p_bufsiz + RPC_SLACK_SPACE;
663 665
664 if (rpc_malloc(task, bufsiz << 1) != NULL) 666 if (xprt->ops->buf_alloc(task, bufsiz << 1) != NULL)
665 return; 667 return;
666 printk(KERN_INFO "RPC: buffer allocation failed for task %p\n", task); 668 printk(KERN_INFO "RPC: buffer allocation failed for task %p\n", task);
667 669
@@ -704,14 +706,14 @@ call_encode(struct rpc_task *task)
704 task->tk_pid, task->tk_status); 706 task->tk_pid, task->tk_status);
705 707
706 /* Default buffer setup */ 708 /* Default buffer setup */
707 bufsiz = task->tk_bufsize >> 1; 709 bufsiz = req->rq_bufsize >> 1;
708 sndbuf->head[0].iov_base = (void *)task->tk_buffer; 710 sndbuf->head[0].iov_base = (void *)req->rq_buffer;
709 sndbuf->head[0].iov_len = bufsiz; 711 sndbuf->head[0].iov_len = bufsiz;
710 sndbuf->tail[0].iov_len = 0; 712 sndbuf->tail[0].iov_len = 0;
711 sndbuf->page_len = 0; 713 sndbuf->page_len = 0;
712 sndbuf->len = 0; 714 sndbuf->len = 0;
713 sndbuf->buflen = bufsiz; 715 sndbuf->buflen = bufsiz;
714 rcvbuf->head[0].iov_base = (void *)((char *)task->tk_buffer + bufsiz); 716 rcvbuf->head[0].iov_base = (void *)((char *)req->rq_buffer + bufsiz);
715 rcvbuf->head[0].iov_len = bufsiz; 717 rcvbuf->head[0].iov_len = bufsiz;
716 rcvbuf->tail[0].iov_len = 0; 718 rcvbuf->tail[0].iov_len = 0;
717 rcvbuf->page_len = 0; 719 rcvbuf->page_len = 0;
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 48510e3ffa02..7415406aa1ae 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -41,8 +41,6 @@ static mempool_t *rpc_buffer_mempool __read_mostly;
41 41
42static void __rpc_default_timer(struct rpc_task *task); 42static void __rpc_default_timer(struct rpc_task *task);
43static void rpciod_killall(void); 43static void rpciod_killall(void);
44static void rpc_free(struct rpc_task *task);
45
46static void rpc_async_schedule(void *); 44static void rpc_async_schedule(void *);
47 45
48/* 46/*
@@ -599,7 +597,6 @@ void rpc_exit_task(struct rpc_task *task)
599 WARN_ON(RPC_ASSASSINATED(task)); 597 WARN_ON(RPC_ASSASSINATED(task));
600 /* Always release the RPC slot and buffer memory */ 598 /* Always release the RPC slot and buffer memory */
601 xprt_release(task); 599 xprt_release(task);
602 rpc_free(task);
603 } 600 }
604 } 601 }
605} 602}
@@ -724,17 +721,19 @@ static void rpc_async_schedule(void *arg)
724 __rpc_execute((struct rpc_task *)arg); 721 __rpc_execute((struct rpc_task *)arg);
725} 722}
726 723
727/* 724/**
728 * Allocate memory for RPC purposes. 725 * rpc_malloc - allocate an RPC buffer
726 * @task: RPC task that will use this buffer
727 * @size: requested byte size
729 * 728 *
730 * We try to ensure that some NFS reads and writes can always proceed 729 * We try to ensure that some NFS reads and writes can always proceed
731 * by using a mempool when allocating 'small' buffers. 730 * by using a mempool when allocating 'small' buffers.
732 * In order to avoid memory starvation triggering more writebacks of 731 * In order to avoid memory starvation triggering more writebacks of
733 * NFS requests, we use GFP_NOFS rather than GFP_KERNEL. 732 * NFS requests, we use GFP_NOFS rather than GFP_KERNEL.
734 */ 733 */
735void * 734void * rpc_malloc(struct rpc_task *task, size_t size)
736rpc_malloc(struct rpc_task *task, size_t size)
737{ 735{
736 struct rpc_rqst *req = task->tk_rqstp;
738 gfp_t gfp; 737 gfp_t gfp;
739 738
740 if (task->tk_flags & RPC_TASK_SWAPPER) 739 if (task->tk_flags & RPC_TASK_SWAPPER)
@@ -743,27 +742,33 @@ rpc_malloc(struct rpc_task *task, size_t size)
743 gfp = GFP_NOFS; 742 gfp = GFP_NOFS;
744 743
745 if (size > RPC_BUFFER_MAXSIZE) { 744 if (size > RPC_BUFFER_MAXSIZE) {
746 task->tk_buffer = kmalloc(size, gfp); 745 req->rq_buffer = kmalloc(size, gfp);
747 if (task->tk_buffer) 746 if (req->rq_buffer)
748 task->tk_bufsize = size; 747 req->rq_bufsize = size;
749 } else { 748 } else {
750 task->tk_buffer = mempool_alloc(rpc_buffer_mempool, gfp); 749 req->rq_buffer = mempool_alloc(rpc_buffer_mempool, gfp);
751 if (task->tk_buffer) 750 if (req->rq_buffer)
752 task->tk_bufsize = RPC_BUFFER_MAXSIZE; 751 req->rq_bufsize = RPC_BUFFER_MAXSIZE;
753 } 752 }
754 return task->tk_buffer; 753 return req->rq_buffer;
755} 754}
756 755
757static void 756/**
758rpc_free(struct rpc_task *task) 757 * rpc_free - free buffer allocated via rpc_malloc
758 * @task: RPC task with a buffer to be freed
759 *
760 */
761void rpc_free(struct rpc_task *task)
759{ 762{
760 if (task->tk_buffer) { 763 struct rpc_rqst *req = task->tk_rqstp;
761 if (task->tk_bufsize == RPC_BUFFER_MAXSIZE) 764
762 mempool_free(task->tk_buffer, rpc_buffer_mempool); 765 if (req->rq_buffer) {
766 if (req->rq_bufsize == RPC_BUFFER_MAXSIZE)
767 mempool_free(req->rq_buffer, rpc_buffer_mempool);
763 else 768 else
764 kfree(task->tk_buffer); 769 kfree(req->rq_buffer);
765 task->tk_buffer = NULL; 770 req->rq_buffer = NULL;
766 task->tk_bufsize = 0; 771 req->rq_bufsize = 0;
767 } 772 }
768} 773}
769 774
@@ -887,7 +892,6 @@ void rpc_release_task(struct rpc_task *task)
887 xprt_release(task); 892 xprt_release(task);
888 if (task->tk_msg.rpc_cred) 893 if (task->tk_msg.rpc_cred)
889 rpcauth_unbindcred(task); 894 rpcauth_unbindcred(task);
890 rpc_free(task);
891 if (task->tk_client) { 895 if (task->tk_client) {
892 rpc_release_client(task->tk_client); 896 rpc_release_client(task->tk_client);
893 task->tk_client = NULL; 897 task->tk_client = NULL;
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 6dda3860351f..069a6cbd49ea 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -838,6 +838,8 @@ static void xprt_request_init(struct rpc_task *task, struct rpc_xprt *xprt)
838 req->rq_timeout = xprt->timeout.to_initval; 838 req->rq_timeout = xprt->timeout.to_initval;
839 req->rq_task = task; 839 req->rq_task = task;
840 req->rq_xprt = xprt; 840 req->rq_xprt = xprt;
841 req->rq_buffer = NULL;
842 req->rq_bufsize = 0;
841 req->rq_xid = xprt_alloc_xid(xprt); 843 req->rq_xid = xprt_alloc_xid(xprt);
842 req->rq_release_snd_buf = NULL; 844 req->rq_release_snd_buf = NULL;
843 dprintk("RPC: %4d reserved req %p xid %08x\n", task->tk_pid, 845 dprintk("RPC: %4d reserved req %p xid %08x\n", task->tk_pid,
@@ -867,6 +869,7 @@ void xprt_release(struct rpc_task *task)
867 mod_timer(&xprt->timer, 869 mod_timer(&xprt->timer,
868 xprt->last_used + xprt->idle_timeout); 870 xprt->last_used + xprt->idle_timeout);
869 spin_unlock_bh(&xprt->transport_lock); 871 spin_unlock_bh(&xprt->transport_lock);
872 xprt->ops->buf_free(task);
870 task->tk_rqstp = NULL; 873 task->tk_rqstp = NULL;
871 if (req->rq_release_snd_buf) 874 if (req->rq_release_snd_buf)
872 req->rq_release_snd_buf(req); 875 req->rq_release_snd_buf(req);
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 77e8800d4127..51f07c9a751b 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -28,6 +28,7 @@
28#include <linux/udp.h> 28#include <linux/udp.h>
29#include <linux/tcp.h> 29#include <linux/tcp.h>
30#include <linux/sunrpc/clnt.h> 30#include <linux/sunrpc/clnt.h>
31#include <linux/sunrpc/sched.h>
31#include <linux/file.h> 32#include <linux/file.h>
32 33
33#include <net/sock.h> 34#include <net/sock.h>
@@ -1161,6 +1162,8 @@ static struct rpc_xprt_ops xs_udp_ops = {
1161 .reserve_xprt = xprt_reserve_xprt_cong, 1162 .reserve_xprt = xprt_reserve_xprt_cong,
1162 .release_xprt = xprt_release_xprt_cong, 1163 .release_xprt = xprt_release_xprt_cong,
1163 .connect = xs_connect, 1164 .connect = xs_connect,
1165 .buf_alloc = rpc_malloc,
1166 .buf_free = rpc_free,
1164 .send_request = xs_udp_send_request, 1167 .send_request = xs_udp_send_request,
1165 .set_retrans_timeout = xprt_set_retrans_timeout_rtt, 1168 .set_retrans_timeout = xprt_set_retrans_timeout_rtt,
1166 .timer = xs_udp_timer, 1169 .timer = xs_udp_timer,
@@ -1173,6 +1176,8 @@ static struct rpc_xprt_ops xs_tcp_ops = {
1173 .reserve_xprt = xprt_reserve_xprt, 1176 .reserve_xprt = xprt_reserve_xprt,
1174 .release_xprt = xprt_release_xprt, 1177 .release_xprt = xprt_release_xprt,
1175 .connect = xs_connect, 1178 .connect = xs_connect,
1179 .buf_alloc = rpc_malloc,
1180 .buf_free = rpc_free,
1176 .send_request = xs_tcp_send_request, 1181 .send_request = xs_tcp_send_request,
1177 .set_retrans_timeout = xprt_set_retrans_timeout_def, 1182 .set_retrans_timeout = xprt_set_retrans_timeout_def,
1178 .close = xs_close, 1183 .close = xs_close,