aboutsummaryrefslogtreecommitdiffstats
path: root/net/sunrpc
diff options
context:
space:
mode:
authorChuck Lever <chuck.lever@oracle.com>2014-05-28 10:33:59 -0400
committerAnna Schumaker <Anna.Schumaker@Netapp.com>2014-06-04 08:56:46 -0400
commit65866f8259851cea5e356d2fd46fc37a4e26330e (patch)
tree03d1ed9471816f6216e48c05f1c41f1ece256432 /net/sunrpc
parent8301a2c047cc25dabd645e5590c1db0ead4c5af4 (diff)
xprtrdma: Reduce the number of hardway buffer allocations
While marshaling an RPC/RDMA request, the inline_{rsize,wsize} settings determine whether an inline request is used, or whether read or write chunks lists are built. The current default value of these settings is 1024. Any RPC request smaller than 1024 bytes is sent to the NFS server completely inline. rpcrdma_buffer_create() allocates and pre-registers a set of RPC buffers for each transport instance, also based on the inline rsize and wsize settings. RPC/RDMA requests and replies are built in these buffers. However, if an RPC/RDMA request is expected to be larger than 1024, a buffer has to be allocated and registered for that RPC, and deregistered and released when the RPC is complete. This is known has a "hardway allocation." Since the introduction of NFSv4, the size of RPC requests has become larger, and hardway allocations are thus more frequent. Hardway allocations are significant overhead, and they waste the existing RPC buffers pre-allocated by rpcrdma_buffer_create(). We'd like fewer hardway allocations. Increasing the size of the pre-registered buffers is the most direct way to do this. However, a blanket increase of the inline thresholds has interoperability consequences. On my 64-bit system, rpcrdma_buffer_create() requests roughly 7000 bytes for each RPC request buffer, using kmalloc(). Due to internal fragmentation, this wastes nearly 1200 bytes because kmalloc() already returns an 8192-byte piece of memory for a 7000-byte allocation request, though the extra space remains unused. So let's round up the size of the pre-allocated buffers, and make use of the unused space in the kmalloc'd memory. This change reduces the amount of hardway allocated memory for an NFSv4 general connectathon run from 1322092 to 9472 bytes (99%). Signed-off-by: Chuck Lever <chuck.lever@oracle.com> Tested-by: Steve Wise <swise@opengridcomputing.com> Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Diffstat (limited to 'net/sunrpc')
-rw-r--r--net/sunrpc/xprtrdma/verbs.c25
1 files changed, 13 insertions, 12 deletions
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 1d083664bfca..c80995af82de 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -50,6 +50,7 @@
50#include <linux/interrupt.h> 50#include <linux/interrupt.h>
51#include <linux/pci.h> /* for Tavor hack below */ 51#include <linux/pci.h> /* for Tavor hack below */
52#include <linux/slab.h> 52#include <linux/slab.h>
53#include <asm/bitops.h>
53 54
54#include "xprt_rdma.h" 55#include "xprt_rdma.h"
55 56
@@ -1005,7 +1006,7 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
1005 struct rpcrdma_ia *ia, struct rpcrdma_create_data_internal *cdata) 1006 struct rpcrdma_ia *ia, struct rpcrdma_create_data_internal *cdata)
1006{ 1007{
1007 char *p; 1008 char *p;
1008 size_t len; 1009 size_t len, rlen, wlen;
1009 int i, rc; 1010 int i, rc;
1010 struct rpcrdma_mw *r; 1011 struct rpcrdma_mw *r;
1011 1012
@@ -1120,16 +1121,16 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
1120 * Allocate/init the request/reply buffers. Doing this 1121 * Allocate/init the request/reply buffers. Doing this
1121 * using kmalloc for now -- one for each buf. 1122 * using kmalloc for now -- one for each buf.
1122 */ 1123 */
1124 wlen = 1 << fls(cdata->inline_wsize + sizeof(struct rpcrdma_req));
1125 rlen = 1 << fls(cdata->inline_rsize + sizeof(struct rpcrdma_rep));
1126 dprintk("RPC: %s: wlen = %zu, rlen = %zu\n",
1127 __func__, wlen, rlen);
1128
1123 for (i = 0; i < buf->rb_max_requests; i++) { 1129 for (i = 0; i < buf->rb_max_requests; i++) {
1124 struct rpcrdma_req *req; 1130 struct rpcrdma_req *req;
1125 struct rpcrdma_rep *rep; 1131 struct rpcrdma_rep *rep;
1126 1132
1127 len = cdata->inline_wsize + sizeof(struct rpcrdma_req); 1133 req = kmalloc(wlen, GFP_KERNEL);
1128 /* RPC layer requests *double* size + 1K RPC_SLACK_SPACE! */
1129 /* Typical ~2400b, so rounding up saves work later */
1130 if (len < 4096)
1131 len = 4096;
1132 req = kmalloc(len, GFP_KERNEL);
1133 if (req == NULL) { 1134 if (req == NULL) {
1134 dprintk("RPC: %s: request buffer %d alloc" 1135 dprintk("RPC: %s: request buffer %d alloc"
1135 " failed\n", __func__, i); 1136 " failed\n", __func__, i);
@@ -1141,16 +1142,16 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
1141 buf->rb_send_bufs[i]->rl_buffer = buf; 1142 buf->rb_send_bufs[i]->rl_buffer = buf;
1142 1143
1143 rc = rpcrdma_register_internal(ia, req->rl_base, 1144 rc = rpcrdma_register_internal(ia, req->rl_base,
1144 len - offsetof(struct rpcrdma_req, rl_base), 1145 wlen - offsetof(struct rpcrdma_req, rl_base),
1145 &buf->rb_send_bufs[i]->rl_handle, 1146 &buf->rb_send_bufs[i]->rl_handle,
1146 &buf->rb_send_bufs[i]->rl_iov); 1147 &buf->rb_send_bufs[i]->rl_iov);
1147 if (rc) 1148 if (rc)
1148 goto out; 1149 goto out;
1149 1150
1150 buf->rb_send_bufs[i]->rl_size = len-sizeof(struct rpcrdma_req); 1151 buf->rb_send_bufs[i]->rl_size = wlen -
1152 sizeof(struct rpcrdma_req);
1151 1153
1152 len = cdata->inline_rsize + sizeof(struct rpcrdma_rep); 1154 rep = kmalloc(rlen, GFP_KERNEL);
1153 rep = kmalloc(len, GFP_KERNEL);
1154 if (rep == NULL) { 1155 if (rep == NULL) {
1155 dprintk("RPC: %s: reply buffer %d alloc failed\n", 1156 dprintk("RPC: %s: reply buffer %d alloc failed\n",
1156 __func__, i); 1157 __func__, i);
@@ -1162,7 +1163,7 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
1162 buf->rb_recv_bufs[i]->rr_buffer = buf; 1163 buf->rb_recv_bufs[i]->rr_buffer = buf;
1163 1164
1164 rc = rpcrdma_register_internal(ia, rep->rr_base, 1165 rc = rpcrdma_register_internal(ia, rep->rr_base,
1165 len - offsetof(struct rpcrdma_rep, rr_base), 1166 rlen - offsetof(struct rpcrdma_rep, rr_base),
1166 &buf->rb_recv_bufs[i]->rr_handle, 1167 &buf->rb_recv_bufs[i]->rr_handle,
1167 &buf->rb_recv_bufs[i]->rr_iov); 1168 &buf->rb_recv_bufs[i]->rr_iov);
1168 if (rc) 1169 if (rc)