diff options
author | Chuck Lever <cel@netapp.com> | 2006-01-03 03:55:49 -0500 |
---|---|---|
committer | Trond Myklebust <Trond.Myklebust@netapp.com> | 2006-01-06 14:58:55 -0500 |
commit | 02107148349f31eee7c0fb06fd7a880df73dbd20 (patch) | |
tree | 37bffd81e08b8e50394ce89a1aa7a3961f0ffbe7 | |
parent | 03c21733938aad0758f5f88e1cc7ede69fc3c910 (diff) |
SUNRPC: switchable buffer allocation
Add RPC client transport switch support for replacing buffer management
on a per-transport basis.
In the current IPv4 socket transport implementation, RPC buffers are
allocated as needed for each RPC message that is sent. Some transport
implementations may choose to use pre-allocated buffers for encoding,
sending, receiving, and unmarshalling RPC messages, however. For
transports capable of direct data placement, the buffers can be carved
out of a pre-registered area of memory rather than from a slab cache.
Test-plan:
Millions of fsx operations. Performance characterization with "sio" and
"iozone". Use oprofile and other tools to look for significant regression
in CPU utilization.
Signed-off-by: Chuck Lever <cel@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
-rw-r--r-- | include/linux/sunrpc/sched.h | 3 | ||||
-rw-r--r-- | include/linux/sunrpc/xprt.h | 10 | ||||
-rw-r--r-- | net/sunrpc/clnt.c | 14 | ||||
-rw-r--r-- | net/sunrpc/sched.c | 50 | ||||
-rw-r--r-- | net/sunrpc/xprt.c | 3 | ||||
-rw-r--r-- | net/sunrpc/xprtsock.c | 5 |
6 files changed, 49 insertions, 36 deletions
diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index 94b0afa4ab05..8b25629accd8 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h | |||
@@ -52,8 +52,6 @@ struct rpc_task { | |||
52 | * RPC call state | 52 | * RPC call state |
53 | */ | 53 | */ |
54 | struct rpc_message tk_msg; /* RPC call info */ | 54 | struct rpc_message tk_msg; /* RPC call info */ |
55 | __u32 * tk_buffer; /* XDR buffer */ | ||
56 | size_t tk_bufsize; | ||
57 | __u8 tk_garb_retry; | 55 | __u8 tk_garb_retry; |
58 | __u8 tk_cred_retry; | 56 | __u8 tk_cred_retry; |
59 | 57 | ||
@@ -268,6 +266,7 @@ struct rpc_task *rpc_wake_up_next(struct rpc_wait_queue *); | |||
268 | void rpc_wake_up_status(struct rpc_wait_queue *, int); | 266 | void rpc_wake_up_status(struct rpc_wait_queue *, int); |
269 | void rpc_delay(struct rpc_task *, unsigned long); | 267 | void rpc_delay(struct rpc_task *, unsigned long); |
270 | void * rpc_malloc(struct rpc_task *, size_t); | 268 | void * rpc_malloc(struct rpc_task *, size_t); |
269 | void rpc_free(struct rpc_task *); | ||
271 | int rpciod_up(void); | 270 | int rpciod_up(void); |
272 | void rpciod_down(void); | 271 | void rpciod_down(void); |
273 | void rpciod_wake_up(void); | 272 | void rpciod_wake_up(void); |
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 3b8b6e823c70..7885b9621ce3 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h | |||
@@ -79,21 +79,19 @@ struct rpc_rqst { | |||
79 | void (*rq_release_snd_buf)(struct rpc_rqst *); /* release rq_enc_pages */ | 79 | void (*rq_release_snd_buf)(struct rpc_rqst *); /* release rq_enc_pages */ |
80 | struct list_head rq_list; | 80 | struct list_head rq_list; |
81 | 81 | ||
82 | __u32 * rq_buffer; /* XDR encode buffer */ | ||
83 | size_t rq_bufsize; | ||
84 | |||
82 | struct xdr_buf rq_private_buf; /* The receive buffer | 85 | struct xdr_buf rq_private_buf; /* The receive buffer |
83 | * used in the softirq. | 86 | * used in the softirq. |
84 | */ | 87 | */ |
85 | unsigned long rq_majortimeo; /* major timeout alarm */ | 88 | unsigned long rq_majortimeo; /* major timeout alarm */ |
86 | unsigned long rq_timeout; /* Current timeout value */ | 89 | unsigned long rq_timeout; /* Current timeout value */ |
87 | unsigned int rq_retries; /* # of retries */ | 90 | unsigned int rq_retries; /* # of retries */ |
88 | /* | ||
89 | * For authentication (e.g. auth_des) | ||
90 | */ | ||
91 | u32 rq_creddata[2]; | ||
92 | 91 | ||
93 | /* | 92 | /* |
94 | * Partial send handling | 93 | * Partial send handling |
95 | */ | 94 | */ |
96 | |||
97 | u32 rq_bytes_sent; /* Bytes we have sent */ | 95 | u32 rq_bytes_sent; /* Bytes we have sent */ |
98 | 96 | ||
99 | unsigned long rq_xtime; /* when transmitted */ | 97 | unsigned long rq_xtime; /* when transmitted */ |
@@ -107,6 +105,8 @@ struct rpc_xprt_ops { | |||
107 | int (*reserve_xprt)(struct rpc_task *task); | 105 | int (*reserve_xprt)(struct rpc_task *task); |
108 | void (*release_xprt)(struct rpc_xprt *xprt, struct rpc_task *task); | 106 | void (*release_xprt)(struct rpc_xprt *xprt, struct rpc_task *task); |
109 | void (*connect)(struct rpc_task *task); | 107 | void (*connect)(struct rpc_task *task); |
108 | void * (*buf_alloc)(struct rpc_task *task, size_t size); | ||
109 | void (*buf_free)(struct rpc_task *task); | ||
110 | int (*send_request)(struct rpc_task *task); | 110 | int (*send_request)(struct rpc_task *task); |
111 | void (*set_retrans_timeout)(struct rpc_task *task); | 111 | void (*set_retrans_timeout)(struct rpc_task *task); |
112 | void (*timer)(struct rpc_task *task); | 112 | void (*timer)(struct rpc_task *task); |
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index b23c0d328c9c..25cba94c5683 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c | |||
@@ -644,24 +644,26 @@ call_reserveresult(struct rpc_task *task) | |||
644 | 644 | ||
645 | /* | 645 | /* |
646 | * 2. Allocate the buffer. For details, see sched.c:rpc_malloc. | 646 | * 2. Allocate the buffer. For details, see sched.c:rpc_malloc. |
647 | * (Note: buffer memory is freed in rpc_task_release). | 647 | * (Note: buffer memory is freed in xprt_release). |
648 | */ | 648 | */ |
649 | static void | 649 | static void |
650 | call_allocate(struct rpc_task *task) | 650 | call_allocate(struct rpc_task *task) |
651 | { | 651 | { |
652 | struct rpc_rqst *req = task->tk_rqstp; | ||
653 | struct rpc_xprt *xprt = task->tk_xprt; | ||
652 | unsigned int bufsiz; | 654 | unsigned int bufsiz; |
653 | 655 | ||
654 | dprintk("RPC: %4d call_allocate (status %d)\n", | 656 | dprintk("RPC: %4d call_allocate (status %d)\n", |
655 | task->tk_pid, task->tk_status); | 657 | task->tk_pid, task->tk_status); |
656 | task->tk_action = call_bind; | 658 | task->tk_action = call_bind; |
657 | if (task->tk_buffer) | 659 | if (req->rq_buffer) |
658 | return; | 660 | return; |
659 | 661 | ||
660 | /* FIXME: compute buffer requirements more exactly using | 662 | /* FIXME: compute buffer requirements more exactly using |
661 | * auth->au_wslack */ | 663 | * auth->au_wslack */ |
662 | bufsiz = task->tk_msg.rpc_proc->p_bufsiz + RPC_SLACK_SPACE; | 664 | bufsiz = task->tk_msg.rpc_proc->p_bufsiz + RPC_SLACK_SPACE; |
663 | 665 | ||
664 | if (rpc_malloc(task, bufsiz << 1) != NULL) | 666 | if (xprt->ops->buf_alloc(task, bufsiz << 1) != NULL) |
665 | return; | 667 | return; |
666 | printk(KERN_INFO "RPC: buffer allocation failed for task %p\n", task); | 668 | printk(KERN_INFO "RPC: buffer allocation failed for task %p\n", task); |
667 | 669 | ||
@@ -704,14 +706,14 @@ call_encode(struct rpc_task *task) | |||
704 | task->tk_pid, task->tk_status); | 706 | task->tk_pid, task->tk_status); |
705 | 707 | ||
706 | /* Default buffer setup */ | 708 | /* Default buffer setup */ |
707 | bufsiz = task->tk_bufsize >> 1; | 709 | bufsiz = req->rq_bufsize >> 1; |
708 | sndbuf->head[0].iov_base = (void *)task->tk_buffer; | 710 | sndbuf->head[0].iov_base = (void *)req->rq_buffer; |
709 | sndbuf->head[0].iov_len = bufsiz; | 711 | sndbuf->head[0].iov_len = bufsiz; |
710 | sndbuf->tail[0].iov_len = 0; | 712 | sndbuf->tail[0].iov_len = 0; |
711 | sndbuf->page_len = 0; | 713 | sndbuf->page_len = 0; |
712 | sndbuf->len = 0; | 714 | sndbuf->len = 0; |
713 | sndbuf->buflen = bufsiz; | 715 | sndbuf->buflen = bufsiz; |
714 | rcvbuf->head[0].iov_base = (void *)((char *)task->tk_buffer + bufsiz); | 716 | rcvbuf->head[0].iov_base = (void *)((char *)req->rq_buffer + bufsiz); |
715 | rcvbuf->head[0].iov_len = bufsiz; | 717 | rcvbuf->head[0].iov_len = bufsiz; |
716 | rcvbuf->tail[0].iov_len = 0; | 718 | rcvbuf->tail[0].iov_len = 0; |
717 | rcvbuf->page_len = 0; | 719 | rcvbuf->page_len = 0; |
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 48510e3ffa02..7415406aa1ae 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c | |||
@@ -41,8 +41,6 @@ static mempool_t *rpc_buffer_mempool __read_mostly; | |||
41 | 41 | ||
42 | static void __rpc_default_timer(struct rpc_task *task); | 42 | static void __rpc_default_timer(struct rpc_task *task); |
43 | static void rpciod_killall(void); | 43 | static void rpciod_killall(void); |
44 | static void rpc_free(struct rpc_task *task); | ||
45 | |||
46 | static void rpc_async_schedule(void *); | 44 | static void rpc_async_schedule(void *); |
47 | 45 | ||
48 | /* | 46 | /* |
@@ -599,7 +597,6 @@ void rpc_exit_task(struct rpc_task *task) | |||
599 | WARN_ON(RPC_ASSASSINATED(task)); | 597 | WARN_ON(RPC_ASSASSINATED(task)); |
600 | /* Always release the RPC slot and buffer memory */ | 598 | /* Always release the RPC slot and buffer memory */ |
601 | xprt_release(task); | 599 | xprt_release(task); |
602 | rpc_free(task); | ||
603 | } | 600 | } |
604 | } | 601 | } |
605 | } | 602 | } |
@@ -724,17 +721,19 @@ static void rpc_async_schedule(void *arg) | |||
724 | __rpc_execute((struct rpc_task *)arg); | 721 | __rpc_execute((struct rpc_task *)arg); |
725 | } | 722 | } |
726 | 723 | ||
727 | /* | 724 | /** |
728 | * Allocate memory for RPC purposes. | 725 | * rpc_malloc - allocate an RPC buffer |
726 | * @task: RPC task that will use this buffer | ||
727 | * @size: requested byte size | ||
729 | * | 728 | * |
730 | * We try to ensure that some NFS reads and writes can always proceed | 729 | * We try to ensure that some NFS reads and writes can always proceed |
731 | * by using a mempool when allocating 'small' buffers. | 730 | * by using a mempool when allocating 'small' buffers. |
732 | * In order to avoid memory starvation triggering more writebacks of | 731 | * In order to avoid memory starvation triggering more writebacks of |
733 | * NFS requests, we use GFP_NOFS rather than GFP_KERNEL. | 732 | * NFS requests, we use GFP_NOFS rather than GFP_KERNEL. |
734 | */ | 733 | */ |
735 | void * | 734 | void * rpc_malloc(struct rpc_task *task, size_t size) |
736 | rpc_malloc(struct rpc_task *task, size_t size) | ||
737 | { | 735 | { |
736 | struct rpc_rqst *req = task->tk_rqstp; | ||
738 | gfp_t gfp; | 737 | gfp_t gfp; |
739 | 738 | ||
740 | if (task->tk_flags & RPC_TASK_SWAPPER) | 739 | if (task->tk_flags & RPC_TASK_SWAPPER) |
@@ -743,27 +742,33 @@ rpc_malloc(struct rpc_task *task, size_t size) | |||
743 | gfp = GFP_NOFS; | 742 | gfp = GFP_NOFS; |
744 | 743 | ||
745 | if (size > RPC_BUFFER_MAXSIZE) { | 744 | if (size > RPC_BUFFER_MAXSIZE) { |
746 | task->tk_buffer = kmalloc(size, gfp); | 745 | req->rq_buffer = kmalloc(size, gfp); |
747 | if (task->tk_buffer) | 746 | if (req->rq_buffer) |
748 | task->tk_bufsize = size; | 747 | req->rq_bufsize = size; |
749 | } else { | 748 | } else { |
750 | task->tk_buffer = mempool_alloc(rpc_buffer_mempool, gfp); | 749 | req->rq_buffer = mempool_alloc(rpc_buffer_mempool, gfp); |
751 | if (task->tk_buffer) | 750 | if (req->rq_buffer) |
752 | task->tk_bufsize = RPC_BUFFER_MAXSIZE; | 751 | req->rq_bufsize = RPC_BUFFER_MAXSIZE; |
753 | } | 752 | } |
754 | return task->tk_buffer; | 753 | return req->rq_buffer; |
755 | } | 754 | } |
756 | 755 | ||
757 | static void | 756 | /** |
758 | rpc_free(struct rpc_task *task) | 757 | * rpc_free - free buffer allocated via rpc_malloc |
758 | * @task: RPC task with a buffer to be freed | ||
759 | * | ||
760 | */ | ||
761 | void rpc_free(struct rpc_task *task) | ||
759 | { | 762 | { |
760 | if (task->tk_buffer) { | 763 | struct rpc_rqst *req = task->tk_rqstp; |
761 | if (task->tk_bufsize == RPC_BUFFER_MAXSIZE) | 764 | |
762 | mempool_free(task->tk_buffer, rpc_buffer_mempool); | 765 | if (req->rq_buffer) { |
766 | if (req->rq_bufsize == RPC_BUFFER_MAXSIZE) | ||
767 | mempool_free(req->rq_buffer, rpc_buffer_mempool); | ||
763 | else | 768 | else |
764 | kfree(task->tk_buffer); | 769 | kfree(req->rq_buffer); |
765 | task->tk_buffer = NULL; | 770 | req->rq_buffer = NULL; |
766 | task->tk_bufsize = 0; | 771 | req->rq_bufsize = 0; |
767 | } | 772 | } |
768 | } | 773 | } |
769 | 774 | ||
@@ -887,7 +892,6 @@ void rpc_release_task(struct rpc_task *task) | |||
887 | xprt_release(task); | 892 | xprt_release(task); |
888 | if (task->tk_msg.rpc_cred) | 893 | if (task->tk_msg.rpc_cred) |
889 | rpcauth_unbindcred(task); | 894 | rpcauth_unbindcred(task); |
890 | rpc_free(task); | ||
891 | if (task->tk_client) { | 895 | if (task->tk_client) { |
892 | rpc_release_client(task->tk_client); | 896 | rpc_release_client(task->tk_client); |
893 | task->tk_client = NULL; | 897 | task->tk_client = NULL; |
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 6dda3860351f..069a6cbd49ea 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c | |||
@@ -838,6 +838,8 @@ static void xprt_request_init(struct rpc_task *task, struct rpc_xprt *xprt) | |||
838 | req->rq_timeout = xprt->timeout.to_initval; | 838 | req->rq_timeout = xprt->timeout.to_initval; |
839 | req->rq_task = task; | 839 | req->rq_task = task; |
840 | req->rq_xprt = xprt; | 840 | req->rq_xprt = xprt; |
841 | req->rq_buffer = NULL; | ||
842 | req->rq_bufsize = 0; | ||
841 | req->rq_xid = xprt_alloc_xid(xprt); | 843 | req->rq_xid = xprt_alloc_xid(xprt); |
842 | req->rq_release_snd_buf = NULL; | 844 | req->rq_release_snd_buf = NULL; |
843 | dprintk("RPC: %4d reserved req %p xid %08x\n", task->tk_pid, | 845 | dprintk("RPC: %4d reserved req %p xid %08x\n", task->tk_pid, |
@@ -867,6 +869,7 @@ void xprt_release(struct rpc_task *task) | |||
867 | mod_timer(&xprt->timer, | 869 | mod_timer(&xprt->timer, |
868 | xprt->last_used + xprt->idle_timeout); | 870 | xprt->last_used + xprt->idle_timeout); |
869 | spin_unlock_bh(&xprt->transport_lock); | 871 | spin_unlock_bh(&xprt->transport_lock); |
872 | xprt->ops->buf_free(task); | ||
870 | task->tk_rqstp = NULL; | 873 | task->tk_rqstp = NULL; |
871 | if (req->rq_release_snd_buf) | 874 | if (req->rq_release_snd_buf) |
872 | req->rq_release_snd_buf(req); | 875 | req->rq_release_snd_buf(req); |
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 77e8800d4127..51f07c9a751b 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c | |||
@@ -28,6 +28,7 @@ | |||
28 | #include <linux/udp.h> | 28 | #include <linux/udp.h> |
29 | #include <linux/tcp.h> | 29 | #include <linux/tcp.h> |
30 | #include <linux/sunrpc/clnt.h> | 30 | #include <linux/sunrpc/clnt.h> |
31 | #include <linux/sunrpc/sched.h> | ||
31 | #include <linux/file.h> | 32 | #include <linux/file.h> |
32 | 33 | ||
33 | #include <net/sock.h> | 34 | #include <net/sock.h> |
@@ -1161,6 +1162,8 @@ static struct rpc_xprt_ops xs_udp_ops = { | |||
1161 | .reserve_xprt = xprt_reserve_xprt_cong, | 1162 | .reserve_xprt = xprt_reserve_xprt_cong, |
1162 | .release_xprt = xprt_release_xprt_cong, | 1163 | .release_xprt = xprt_release_xprt_cong, |
1163 | .connect = xs_connect, | 1164 | .connect = xs_connect, |
1165 | .buf_alloc = rpc_malloc, | ||
1166 | .buf_free = rpc_free, | ||
1164 | .send_request = xs_udp_send_request, | 1167 | .send_request = xs_udp_send_request, |
1165 | .set_retrans_timeout = xprt_set_retrans_timeout_rtt, | 1168 | .set_retrans_timeout = xprt_set_retrans_timeout_rtt, |
1166 | .timer = xs_udp_timer, | 1169 | .timer = xs_udp_timer, |
@@ -1173,6 +1176,8 @@ static struct rpc_xprt_ops xs_tcp_ops = { | |||
1173 | .reserve_xprt = xprt_reserve_xprt, | 1176 | .reserve_xprt = xprt_reserve_xprt, |
1174 | .release_xprt = xprt_release_xprt, | 1177 | .release_xprt = xprt_release_xprt, |
1175 | .connect = xs_connect, | 1178 | .connect = xs_connect, |
1179 | .buf_alloc = rpc_malloc, | ||
1180 | .buf_free = rpc_free, | ||
1176 | .send_request = xs_tcp_send_request, | 1181 | .send_request = xs_tcp_send_request, |
1177 | .set_retrans_timeout = xprt_set_retrans_timeout_def, | 1182 | .set_retrans_timeout = xprt_set_retrans_timeout_def, |
1178 | .close = xs_close, | 1183 | .close = xs_close, |