diff options
author | Chuck Lever <chuck.lever@oracle.com> | 2014-07-29 17:25:38 -0400 |
---|---|---|
committer | Anna Schumaker <Anna.Schumaker@Netapp.com> | 2014-07-31 16:22:57 -0400 |
commit | 2e84522c2e0323a090fe1f7eeed6d5b6a68efe5f (patch) | |
tree | 542a04ce4fa4cc1312a76371282ddb6d9ebcf5fe /net/sunrpc/xprtrdma | |
parent | f590e878c52c38046fd7cfa5a742ddae68717484 (diff) |
xprtrdma: Allocate each struct rpcrdma_mw separately
Currently rpcrdma_buffer_create() allocates struct rpcrdma_mw's as
a single contiguous area of memory. It amounts to quite a bit of
memory, and there's no requirement for these to be carved from a
single piece of contiguous memory.
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Tested-by: Steve Wise <swise@opengridcomputing.com>
Tested-by: Shirley Ma <shirley.ma@oracle.com>
Tested-by: Devesh Sharma <devesh.sharma@emulex.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Diffstat (limited to 'net/sunrpc/xprtrdma')
-rw-r--r-- | net/sunrpc/xprtrdma/verbs.c | 242 |
1 files changed, 143 insertions, 99 deletions
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 80c01638a66b..31c4fd36d62c 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c | |||
@@ -1005,9 +1005,91 @@ rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) | |||
1005 | return rc; | 1005 | return rc; |
1006 | } | 1006 | } |
1007 | 1007 | ||
1008 | /* | 1008 | static int |
1009 | * Initialize buffer memory | 1009 | rpcrdma_init_fmrs(struct rpcrdma_ia *ia, struct rpcrdma_buffer *buf) |
1010 | */ | 1010 | { |
1011 | int mr_access_flags = IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ; | ||
1012 | struct ib_fmr_attr fmr_attr = { | ||
1013 | .max_pages = RPCRDMA_MAX_DATA_SEGS, | ||
1014 | .max_maps = 1, | ||
1015 | .page_shift = PAGE_SHIFT | ||
1016 | }; | ||
1017 | struct rpcrdma_mw *r; | ||
1018 | int i, rc; | ||
1019 | |||
1020 | i = (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS; | ||
1021 | dprintk("RPC: %s: initalizing %d FMRs\n", __func__, i); | ||
1022 | |||
1023 | while (i--) { | ||
1024 | r = kzalloc(sizeof(*r), GFP_KERNEL); | ||
1025 | if (r == NULL) | ||
1026 | return -ENOMEM; | ||
1027 | |||
1028 | r->r.fmr = ib_alloc_fmr(ia->ri_pd, mr_access_flags, &fmr_attr); | ||
1029 | if (IS_ERR(r->r.fmr)) { | ||
1030 | rc = PTR_ERR(r->r.fmr); | ||
1031 | dprintk("RPC: %s: ib_alloc_fmr failed %i\n", | ||
1032 | __func__, rc); | ||
1033 | goto out_free; | ||
1034 | } | ||
1035 | |||
1036 | list_add(&r->mw_list, &buf->rb_mws); | ||
1037 | list_add(&r->mw_all, &buf->rb_all); | ||
1038 | } | ||
1039 | return 0; | ||
1040 | |||
1041 | out_free: | ||
1042 | kfree(r); | ||
1043 | return rc; | ||
1044 | } | ||
1045 | |||
1046 | static int | ||
1047 | rpcrdma_init_frmrs(struct rpcrdma_ia *ia, struct rpcrdma_buffer *buf) | ||
1048 | { | ||
1049 | struct rpcrdma_frmr *f; | ||
1050 | struct rpcrdma_mw *r; | ||
1051 | int i, rc; | ||
1052 | |||
1053 | i = (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS; | ||
1054 | dprintk("RPC: %s: initalizing %d FRMRs\n", __func__, i); | ||
1055 | |||
1056 | while (i--) { | ||
1057 | r = kzalloc(sizeof(*r), GFP_KERNEL); | ||
1058 | if (r == NULL) | ||
1059 | return -ENOMEM; | ||
1060 | f = &r->r.frmr; | ||
1061 | |||
1062 | f->fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd, | ||
1063 | ia->ri_max_frmr_depth); | ||
1064 | if (IS_ERR(f->fr_mr)) { | ||
1065 | rc = PTR_ERR(f->fr_mr); | ||
1066 | dprintk("RPC: %s: ib_alloc_fast_reg_mr " | ||
1067 | "failed %i\n", __func__, rc); | ||
1068 | goto out_free; | ||
1069 | } | ||
1070 | |||
1071 | f->fr_pgl = ib_alloc_fast_reg_page_list(ia->ri_id->device, | ||
1072 | ia->ri_max_frmr_depth); | ||
1073 | if (IS_ERR(f->fr_pgl)) { | ||
1074 | rc = PTR_ERR(f->fr_pgl); | ||
1075 | dprintk("RPC: %s: ib_alloc_fast_reg_page_list " | ||
1076 | "failed %i\n", __func__, rc); | ||
1077 | |||
1078 | ib_dereg_mr(f->fr_mr); | ||
1079 | goto out_free; | ||
1080 | } | ||
1081 | |||
1082 | list_add(&r->mw_list, &buf->rb_mws); | ||
1083 | list_add(&r->mw_all, &buf->rb_all); | ||
1084 | } | ||
1085 | |||
1086 | return 0; | ||
1087 | |||
1088 | out_free: | ||
1089 | kfree(r); | ||
1090 | return rc; | ||
1091 | } | ||
1092 | |||
1011 | int | 1093 | int |
1012 | rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, | 1094 | rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, |
1013 | struct rpcrdma_ia *ia, struct rpcrdma_create_data_internal *cdata) | 1095 | struct rpcrdma_ia *ia, struct rpcrdma_create_data_internal *cdata) |
@@ -1015,7 +1097,6 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, | |||
1015 | char *p; | 1097 | char *p; |
1016 | size_t len, rlen, wlen; | 1098 | size_t len, rlen, wlen; |
1017 | int i, rc; | 1099 | int i, rc; |
1018 | struct rpcrdma_mw *r; | ||
1019 | 1100 | ||
1020 | buf->rb_max_requests = cdata->max_requests; | 1101 | buf->rb_max_requests = cdata->max_requests; |
1021 | spin_lock_init(&buf->rb_lock); | 1102 | spin_lock_init(&buf->rb_lock); |
@@ -1026,28 +1107,12 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, | |||
1026 | * 2. arrays of struct rpcrdma_req to fill in pointers | 1107 | * 2. arrays of struct rpcrdma_req to fill in pointers |
1027 | * 3. array of struct rpcrdma_rep for replies | 1108 | * 3. array of struct rpcrdma_rep for replies |
1028 | * 4. padding, if any | 1109 | * 4. padding, if any |
1029 | * 5. mw's, fmr's or frmr's, if any | ||
1030 | * Send/recv buffers in req/rep need to be registered | 1110 | * Send/recv buffers in req/rep need to be registered |
1031 | */ | 1111 | */ |
1032 | |||
1033 | len = buf->rb_max_requests * | 1112 | len = buf->rb_max_requests * |
1034 | (sizeof(struct rpcrdma_req *) + sizeof(struct rpcrdma_rep *)); | 1113 | (sizeof(struct rpcrdma_req *) + sizeof(struct rpcrdma_rep *)); |
1035 | len += cdata->padding; | 1114 | len += cdata->padding; |
1036 | switch (ia->ri_memreg_strategy) { | ||
1037 | case RPCRDMA_FRMR: | ||
1038 | len += buf->rb_max_requests * RPCRDMA_MAX_SEGS * | ||
1039 | sizeof(struct rpcrdma_mw); | ||
1040 | break; | ||
1041 | case RPCRDMA_MTHCAFMR: | ||
1042 | /* TBD we are perhaps overallocating here */ | ||
1043 | len += (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS * | ||
1044 | sizeof(struct rpcrdma_mw); | ||
1045 | break; | ||
1046 | default: | ||
1047 | break; | ||
1048 | } | ||
1049 | 1115 | ||
1050 | /* allocate 1, 4 and 5 in one shot */ | ||
1051 | p = kzalloc(len, GFP_KERNEL); | 1116 | p = kzalloc(len, GFP_KERNEL); |
1052 | if (p == NULL) { | 1117 | if (p == NULL) { |
1053 | dprintk("RPC: %s: req_t/rep_t/pad kzalloc(%zd) failed\n", | 1118 | dprintk("RPC: %s: req_t/rep_t/pad kzalloc(%zd) failed\n", |
@@ -1075,53 +1140,16 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, | |||
1075 | 1140 | ||
1076 | INIT_LIST_HEAD(&buf->rb_mws); | 1141 | INIT_LIST_HEAD(&buf->rb_mws); |
1077 | INIT_LIST_HEAD(&buf->rb_all); | 1142 | INIT_LIST_HEAD(&buf->rb_all); |
1078 | r = (struct rpcrdma_mw *)p; | ||
1079 | switch (ia->ri_memreg_strategy) { | 1143 | switch (ia->ri_memreg_strategy) { |
1080 | case RPCRDMA_FRMR: | 1144 | case RPCRDMA_FRMR: |
1081 | for (i = buf->rb_max_requests * RPCRDMA_MAX_SEGS; i; i--) { | 1145 | rc = rpcrdma_init_frmrs(ia, buf); |
1082 | r->r.frmr.fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd, | 1146 | if (rc) |
1083 | ia->ri_max_frmr_depth); | 1147 | goto out; |
1084 | if (IS_ERR(r->r.frmr.fr_mr)) { | ||
1085 | rc = PTR_ERR(r->r.frmr.fr_mr); | ||
1086 | dprintk("RPC: %s: ib_alloc_fast_reg_mr" | ||
1087 | " failed %i\n", __func__, rc); | ||
1088 | goto out; | ||
1089 | } | ||
1090 | r->r.frmr.fr_pgl = ib_alloc_fast_reg_page_list( | ||
1091 | ia->ri_id->device, | ||
1092 | ia->ri_max_frmr_depth); | ||
1093 | if (IS_ERR(r->r.frmr.fr_pgl)) { | ||
1094 | rc = PTR_ERR(r->r.frmr.fr_pgl); | ||
1095 | dprintk("RPC: %s: " | ||
1096 | "ib_alloc_fast_reg_page_list " | ||
1097 | "failed %i\n", __func__, rc); | ||
1098 | |||
1099 | ib_dereg_mr(r->r.frmr.fr_mr); | ||
1100 | goto out; | ||
1101 | } | ||
1102 | list_add(&r->mw_all, &buf->rb_all); | ||
1103 | list_add(&r->mw_list, &buf->rb_mws); | ||
1104 | ++r; | ||
1105 | } | ||
1106 | break; | 1148 | break; |
1107 | case RPCRDMA_MTHCAFMR: | 1149 | case RPCRDMA_MTHCAFMR: |
1108 | /* TBD we are perhaps overallocating here */ | 1150 | rc = rpcrdma_init_fmrs(ia, buf); |
1109 | for (i = (buf->rb_max_requests+1) * RPCRDMA_MAX_SEGS; i; i--) { | 1151 | if (rc) |
1110 | static struct ib_fmr_attr fa = | 1152 | goto out; |
1111 | { RPCRDMA_MAX_DATA_SEGS, 1, PAGE_SHIFT }; | ||
1112 | r->r.fmr = ib_alloc_fmr(ia->ri_pd, | ||
1113 | IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ, | ||
1114 | &fa); | ||
1115 | if (IS_ERR(r->r.fmr)) { | ||
1116 | rc = PTR_ERR(r->r.fmr); | ||
1117 | dprintk("RPC: %s: ib_alloc_fmr" | ||
1118 | " failed %i\n", __func__, rc); | ||
1119 | goto out; | ||
1120 | } | ||
1121 | list_add(&r->mw_all, &buf->rb_all); | ||
1122 | list_add(&r->mw_list, &buf->rb_mws); | ||
1123 | ++r; | ||
1124 | } | ||
1125 | break; | 1153 | break; |
1126 | default: | 1154 | default: |
1127 | break; | 1155 | break; |
@@ -1189,24 +1217,57 @@ out: | |||
1189 | return rc; | 1217 | return rc; |
1190 | } | 1218 | } |
1191 | 1219 | ||
1192 | /* | 1220 | static void |
1193 | * Unregister and destroy buffer memory. Need to deal with | 1221 | rpcrdma_destroy_fmrs(struct rpcrdma_buffer *buf) |
1194 | * partial initialization, so it's callable from failed create. | 1222 | { |
1195 | * Must be called before destroying endpoint, as registrations | 1223 | struct rpcrdma_mw *r; |
1196 | * reference it. | 1224 | int rc; |
1197 | */ | 1225 | |
1226 | while (!list_empty(&buf->rb_all)) { | ||
1227 | r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all); | ||
1228 | list_del(&r->mw_all); | ||
1229 | list_del(&r->mw_list); | ||
1230 | |||
1231 | rc = ib_dealloc_fmr(r->r.fmr); | ||
1232 | if (rc) | ||
1233 | dprintk("RPC: %s: ib_dealloc_fmr failed %i\n", | ||
1234 | __func__, rc); | ||
1235 | |||
1236 | kfree(r); | ||
1237 | } | ||
1238 | } | ||
1239 | |||
1240 | static void | ||
1241 | rpcrdma_destroy_frmrs(struct rpcrdma_buffer *buf) | ||
1242 | { | ||
1243 | struct rpcrdma_mw *r; | ||
1244 | int rc; | ||
1245 | |||
1246 | while (!list_empty(&buf->rb_all)) { | ||
1247 | r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all); | ||
1248 | list_del(&r->mw_all); | ||
1249 | list_del(&r->mw_list); | ||
1250 | |||
1251 | rc = ib_dereg_mr(r->r.frmr.fr_mr); | ||
1252 | if (rc) | ||
1253 | dprintk("RPC: %s: ib_dereg_mr failed %i\n", | ||
1254 | __func__, rc); | ||
1255 | ib_free_fast_reg_page_list(r->r.frmr.fr_pgl); | ||
1256 | |||
1257 | kfree(r); | ||
1258 | } | ||
1259 | } | ||
1260 | |||
1198 | void | 1261 | void |
1199 | rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) | 1262 | rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) |
1200 | { | 1263 | { |
1201 | int rc, i; | ||
1202 | struct rpcrdma_ia *ia = rdmab_to_ia(buf); | 1264 | struct rpcrdma_ia *ia = rdmab_to_ia(buf); |
1203 | struct rpcrdma_mw *r; | 1265 | int i; |
1204 | 1266 | ||
1205 | /* clean up in reverse order from create | 1267 | /* clean up in reverse order from create |
1206 | * 1. recv mr memory (mr free, then kfree) | 1268 | * 1. recv mr memory (mr free, then kfree) |
1207 | * 2. send mr memory (mr free, then kfree) | 1269 | * 2. send mr memory (mr free, then kfree) |
1208 | * 3. padding (if any) [moved to rpcrdma_ep_destroy] | 1270 | * 3. MWs |
1209 | * 4. arrays | ||
1210 | */ | 1271 | */ |
1211 | dprintk("RPC: %s: entering\n", __func__); | 1272 | dprintk("RPC: %s: entering\n", __func__); |
1212 | 1273 | ||
@@ -1225,32 +1286,15 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) | |||
1225 | } | 1286 | } |
1226 | } | 1287 | } |
1227 | 1288 | ||
1228 | while (!list_empty(&buf->rb_mws)) { | 1289 | switch (ia->ri_memreg_strategy) { |
1229 | r = list_entry(buf->rb_mws.next, | 1290 | case RPCRDMA_FRMR: |
1230 | struct rpcrdma_mw, mw_list); | 1291 | rpcrdma_destroy_frmrs(buf); |
1231 | list_del(&r->mw_all); | 1292 | break; |
1232 | list_del(&r->mw_list); | 1293 | case RPCRDMA_MTHCAFMR: |
1233 | switch (ia->ri_memreg_strategy) { | 1294 | rpcrdma_destroy_fmrs(buf); |
1234 | case RPCRDMA_FRMR: | 1295 | break; |
1235 | rc = ib_dereg_mr(r->r.frmr.fr_mr); | 1296 | default: |
1236 | if (rc) | 1297 | break; |
1237 | dprintk("RPC: %s:" | ||
1238 | " ib_dereg_mr" | ||
1239 | " failed %i\n", | ||
1240 | __func__, rc); | ||
1241 | ib_free_fast_reg_page_list(r->r.frmr.fr_pgl); | ||
1242 | break; | ||
1243 | case RPCRDMA_MTHCAFMR: | ||
1244 | rc = ib_dealloc_fmr(r->r.fmr); | ||
1245 | if (rc) | ||
1246 | dprintk("RPC: %s:" | ||
1247 | " ib_dealloc_fmr" | ||
1248 | " failed %i\n", | ||
1249 | __func__, rc); | ||
1250 | break; | ||
1251 | default: | ||
1252 | break; | ||
1253 | } | ||
1254 | } | 1298 | } |
1255 | 1299 | ||
1256 | kfree(buf->rb_pool); | 1300 | kfree(buf->rb_pool); |