diff options
Diffstat (limited to 'fs/nfs/pnfs.c')
-rw-r--r-- | fs/nfs/pnfs.c | 352 |
1 files changed, 263 insertions, 89 deletions
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 38512bcd2e98..b8323aa7b543 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c | |||
@@ -395,6 +395,9 @@ mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, | |||
395 | dprintk("%s:Begin lo %p\n", __func__, lo); | 395 | dprintk("%s:Begin lo %p\n", __func__, lo); |
396 | 396 | ||
397 | if (list_empty(&lo->plh_segs)) { | 397 | if (list_empty(&lo->plh_segs)) { |
398 | /* Reset MDS Threshold I/O counters */ | ||
399 | NFS_I(lo->plh_inode)->write_io = 0; | ||
400 | NFS_I(lo->plh_inode)->read_io = 0; | ||
398 | if (!test_and_set_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags)) | 401 | if (!test_and_set_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags)) |
399 | put_layout_hdr_locked(lo); | 402 | put_layout_hdr_locked(lo); |
400 | return 0; | 403 | return 0; |
@@ -455,6 +458,7 @@ pnfs_destroy_layout(struct nfs_inode *nfsi) | |||
455 | spin_unlock(&nfsi->vfs_inode.i_lock); | 458 | spin_unlock(&nfsi->vfs_inode.i_lock); |
456 | pnfs_free_lseg_list(&tmp_list); | 459 | pnfs_free_lseg_list(&tmp_list); |
457 | } | 460 | } |
461 | EXPORT_SYMBOL_GPL(pnfs_destroy_layout); | ||
458 | 462 | ||
459 | /* | 463 | /* |
460 | * Called by the state manger to remove all layouts established under an | 464 | * Called by the state manger to remove all layouts established under an |
@@ -692,6 +696,7 @@ out: | |||
692 | dprintk("<-- %s status: %d\n", __func__, status); | 696 | dprintk("<-- %s status: %d\n", __func__, status); |
693 | return status; | 697 | return status; |
694 | } | 698 | } |
699 | EXPORT_SYMBOL_GPL(_pnfs_return_layout); | ||
695 | 700 | ||
696 | bool pnfs_roc(struct inode *ino) | 701 | bool pnfs_roc(struct inode *ino) |
697 | { | 702 | { |
@@ -931,6 +936,81 @@ pnfs_find_lseg(struct pnfs_layout_hdr *lo, | |||
931 | } | 936 | } |
932 | 937 | ||
933 | /* | 938 | /* |
939 | * Use mdsthreshold hints set at each OPEN to determine if I/O should go | ||
940 | * to the MDS or over pNFS | ||
941 | * | ||
942 | * The nfs_inode read_io and write_io fields are cumulative counters reset | ||
943 | * when there are no layout segments. Note that in pnfs_update_layout iomode | ||
944 | * is set to IOMODE_READ for a READ request, and set to IOMODE_RW for a | ||
945 | * WRITE request. | ||
946 | * | ||
947 | * A return of true means use MDS I/O. | ||
948 | * | ||
949 | * From rfc 5661: | ||
950 | * If a file's size is smaller than the file size threshold, data accesses | ||
951 | * SHOULD be sent to the metadata server. If an I/O request has a length that | ||
952 | * is below the I/O size threshold, the I/O SHOULD be sent to the metadata | ||
953 | * server. If both file size and I/O size are provided, the client SHOULD | ||
954 | * reach or exceed both thresholds before sending its read or write | ||
955 | * requests to the data server. | ||
956 | */ | ||
957 | static bool pnfs_within_mdsthreshold(struct nfs_open_context *ctx, | ||
958 | struct inode *ino, int iomode) | ||
959 | { | ||
960 | struct nfs4_threshold *t = ctx->mdsthreshold; | ||
961 | struct nfs_inode *nfsi = NFS_I(ino); | ||
962 | loff_t fsize = i_size_read(ino); | ||
963 | bool size = false, size_set = false, io = false, io_set = false, ret = false; | ||
964 | |||
965 | if (t == NULL) | ||
966 | return ret; | ||
967 | |||
968 | dprintk("%s bm=0x%x rd_sz=%llu wr_sz=%llu rd_io=%llu wr_io=%llu\n", | ||
969 | __func__, t->bm, t->rd_sz, t->wr_sz, t->rd_io_sz, t->wr_io_sz); | ||
970 | |||
971 | switch (iomode) { | ||
972 | case IOMODE_READ: | ||
973 | if (t->bm & THRESHOLD_RD) { | ||
974 | dprintk("%s fsize %llu\n", __func__, fsize); | ||
975 | size_set = true; | ||
976 | if (fsize < t->rd_sz) | ||
977 | size = true; | ||
978 | } | ||
979 | if (t->bm & THRESHOLD_RD_IO) { | ||
980 | dprintk("%s nfsi->read_io %llu\n", __func__, | ||
981 | nfsi->read_io); | ||
982 | io_set = true; | ||
983 | if (nfsi->read_io < t->rd_io_sz) | ||
984 | io = true; | ||
985 | } | ||
986 | break; | ||
987 | case IOMODE_RW: | ||
988 | if (t->bm & THRESHOLD_WR) { | ||
989 | dprintk("%s fsize %llu\n", __func__, fsize); | ||
990 | size_set = true; | ||
991 | if (fsize < t->wr_sz) | ||
992 | size = true; | ||
993 | } | ||
994 | if (t->bm & THRESHOLD_WR_IO) { | ||
995 | dprintk("%s nfsi->write_io %llu\n", __func__, | ||
996 | nfsi->write_io); | ||
997 | io_set = true; | ||
998 | if (nfsi->write_io < t->wr_io_sz) | ||
999 | io = true; | ||
1000 | } | ||
1001 | break; | ||
1002 | } | ||
1003 | if (size_set && io_set) { | ||
1004 | if (size && io) | ||
1005 | ret = true; | ||
1006 | } else if (size || io) | ||
1007 | ret = true; | ||
1008 | |||
1009 | dprintk("<-- %s size %d io %d ret %d\n", __func__, size, io, ret); | ||
1010 | return ret; | ||
1011 | } | ||
1012 | |||
1013 | /* | ||
934 | * Layout segment is retreived from the server if not cached. | 1014 | * Layout segment is retreived from the server if not cached. |
935 | * The appropriate layout segment is referenced and returned to the caller. | 1015 | * The appropriate layout segment is referenced and returned to the caller. |
936 | */ | 1016 | */ |
@@ -957,6 +1037,10 @@ pnfs_update_layout(struct inode *ino, | |||
957 | 1037 | ||
958 | if (!pnfs_enabled_sb(NFS_SERVER(ino))) | 1038 | if (!pnfs_enabled_sb(NFS_SERVER(ino))) |
959 | return NULL; | 1039 | return NULL; |
1040 | |||
1041 | if (pnfs_within_mdsthreshold(ctx, ino, iomode)) | ||
1042 | return NULL; | ||
1043 | |||
960 | spin_lock(&ino->i_lock); | 1044 | spin_lock(&ino->i_lock); |
961 | lo = pnfs_find_alloc_layout(ino, ctx, gfp_flags); | 1045 | lo = pnfs_find_alloc_layout(ino, ctx, gfp_flags); |
962 | if (lo == NULL) { | 1046 | if (lo == NULL) { |
@@ -1082,6 +1166,10 @@ pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *r | |||
1082 | { | 1166 | { |
1083 | BUG_ON(pgio->pg_lseg != NULL); | 1167 | BUG_ON(pgio->pg_lseg != NULL); |
1084 | 1168 | ||
1169 | if (req->wb_offset != req->wb_pgbase) { | ||
1170 | nfs_pageio_reset_read_mds(pgio); | ||
1171 | return; | ||
1172 | } | ||
1085 | pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, | 1173 | pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, |
1086 | req->wb_context, | 1174 | req->wb_context, |
1087 | req_offset(req), | 1175 | req_offset(req), |
@@ -1100,6 +1188,10 @@ pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page * | |||
1100 | { | 1188 | { |
1101 | BUG_ON(pgio->pg_lseg != NULL); | 1189 | BUG_ON(pgio->pg_lseg != NULL); |
1102 | 1190 | ||
1191 | if (req->wb_offset != req->wb_pgbase) { | ||
1192 | nfs_pageio_reset_write_mds(pgio); | ||
1193 | return; | ||
1194 | } | ||
1103 | pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, | 1195 | pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, |
1104 | req->wb_context, | 1196 | req->wb_context, |
1105 | req_offset(req), | 1197 | req_offset(req), |
@@ -1113,26 +1205,31 @@ pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page * | |||
1113 | EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_write); | 1205 | EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_write); |
1114 | 1206 | ||
1115 | bool | 1207 | bool |
1116 | pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode) | 1208 | pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode, |
1209 | const struct nfs_pgio_completion_ops *compl_ops) | ||
1117 | { | 1210 | { |
1118 | struct nfs_server *server = NFS_SERVER(inode); | 1211 | struct nfs_server *server = NFS_SERVER(inode); |
1119 | struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld; | 1212 | struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld; |
1120 | 1213 | ||
1121 | if (ld == NULL) | 1214 | if (ld == NULL) |
1122 | return false; | 1215 | return false; |
1123 | nfs_pageio_init(pgio, inode, ld->pg_read_ops, server->rsize, 0); | 1216 | nfs_pageio_init(pgio, inode, ld->pg_read_ops, compl_ops, |
1217 | server->rsize, 0); | ||
1124 | return true; | 1218 | return true; |
1125 | } | 1219 | } |
1126 | 1220 | ||
1127 | bool | 1221 | bool |
1128 | pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags) | 1222 | pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, |
1223 | int ioflags, | ||
1224 | const struct nfs_pgio_completion_ops *compl_ops) | ||
1129 | { | 1225 | { |
1130 | struct nfs_server *server = NFS_SERVER(inode); | 1226 | struct nfs_server *server = NFS_SERVER(inode); |
1131 | struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld; | 1227 | struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld; |
1132 | 1228 | ||
1133 | if (ld == NULL) | 1229 | if (ld == NULL) |
1134 | return false; | 1230 | return false; |
1135 | nfs_pageio_init(pgio, inode, ld->pg_write_ops, server->wsize, ioflags); | 1231 | nfs_pageio_init(pgio, inode, ld->pg_write_ops, compl_ops, |
1232 | server->wsize, ioflags); | ||
1136 | return true; | 1233 | return true; |
1137 | } | 1234 | } |
1138 | 1235 | ||
@@ -1162,13 +1259,15 @@ pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, | |||
1162 | } | 1259 | } |
1163 | EXPORT_SYMBOL_GPL(pnfs_generic_pg_test); | 1260 | EXPORT_SYMBOL_GPL(pnfs_generic_pg_test); |
1164 | 1261 | ||
1165 | static int pnfs_write_done_resend_to_mds(struct inode *inode, struct list_head *head) | 1262 | int pnfs_write_done_resend_to_mds(struct inode *inode, |
1263 | struct list_head *head, | ||
1264 | const struct nfs_pgio_completion_ops *compl_ops) | ||
1166 | { | 1265 | { |
1167 | struct nfs_pageio_descriptor pgio; | 1266 | struct nfs_pageio_descriptor pgio; |
1168 | LIST_HEAD(failed); | 1267 | LIST_HEAD(failed); |
1169 | 1268 | ||
1170 | /* Resend all requests through the MDS */ | 1269 | /* Resend all requests through the MDS */ |
1171 | nfs_pageio_init_write_mds(&pgio, inode, FLUSH_STABLE); | 1270 | nfs_pageio_init_write_mds(&pgio, inode, FLUSH_STABLE, compl_ops); |
1172 | while (!list_empty(head)) { | 1271 | while (!list_empty(head)) { |
1173 | struct nfs_page *req = nfs_list_entry(head->next); | 1272 | struct nfs_page *req = nfs_list_entry(head->next); |
1174 | 1273 | ||
@@ -1188,30 +1287,37 @@ static int pnfs_write_done_resend_to_mds(struct inode *inode, struct list_head * | |||
1188 | } | 1287 | } |
1189 | return 0; | 1288 | return 0; |
1190 | } | 1289 | } |
1290 | EXPORT_SYMBOL_GPL(pnfs_write_done_resend_to_mds); | ||
1291 | |||
1292 | static void pnfs_ld_handle_write_error(struct nfs_write_data *data) | ||
1293 | { | ||
1294 | struct nfs_pgio_header *hdr = data->header; | ||
1295 | |||
1296 | dprintk("pnfs write error = %d\n", hdr->pnfs_error); | ||
1297 | if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags & | ||
1298 | PNFS_LAYOUTRET_ON_ERROR) { | ||
1299 | clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(hdr->inode)->flags); | ||
1300 | pnfs_return_layout(hdr->inode); | ||
1301 | } | ||
1302 | if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) | ||
1303 | data->task.tk_status = pnfs_write_done_resend_to_mds(hdr->inode, | ||
1304 | &hdr->pages, | ||
1305 | hdr->completion_ops); | ||
1306 | } | ||
1191 | 1307 | ||
1192 | /* | 1308 | /* |
1193 | * Called by non rpc-based layout drivers | 1309 | * Called by non rpc-based layout drivers |
1194 | */ | 1310 | */ |
1195 | void pnfs_ld_write_done(struct nfs_write_data *data) | 1311 | void pnfs_ld_write_done(struct nfs_write_data *data) |
1196 | { | 1312 | { |
1197 | if (likely(!data->pnfs_error)) { | 1313 | struct nfs_pgio_header *hdr = data->header; |
1314 | |||
1315 | if (!hdr->pnfs_error) { | ||
1198 | pnfs_set_layoutcommit(data); | 1316 | pnfs_set_layoutcommit(data); |
1199 | data->mds_ops->rpc_call_done(&data->task, data); | 1317 | hdr->mds_ops->rpc_call_done(&data->task, data); |
1200 | } else { | 1318 | } else |
1201 | dprintk("pnfs write error = %d\n", data->pnfs_error); | 1319 | pnfs_ld_handle_write_error(data); |
1202 | if (NFS_SERVER(data->inode)->pnfs_curr_ld->flags & | 1320 | hdr->mds_ops->rpc_release(data); |
1203 | PNFS_LAYOUTRET_ON_ERROR) { | ||
1204 | /* Don't lo_commit on error, Server will needs to | ||
1205 | * preform a file recovery. | ||
1206 | */ | ||
1207 | clear_bit(NFS_INO_LAYOUTCOMMIT, | ||
1208 | &NFS_I(data->inode)->flags); | ||
1209 | pnfs_return_layout(data->inode); | ||
1210 | } | ||
1211 | data->task.tk_status = pnfs_write_done_resend_to_mds(data->inode, &data->pages); | ||
1212 | } | ||
1213 | put_lseg(data->lseg); | ||
1214 | data->mds_ops->rpc_release(data); | ||
1215 | } | 1321 | } |
1216 | EXPORT_SYMBOL_GPL(pnfs_ld_write_done); | 1322 | EXPORT_SYMBOL_GPL(pnfs_ld_write_done); |
1217 | 1323 | ||
@@ -1219,12 +1325,13 @@ static void | |||
1219 | pnfs_write_through_mds(struct nfs_pageio_descriptor *desc, | 1325 | pnfs_write_through_mds(struct nfs_pageio_descriptor *desc, |
1220 | struct nfs_write_data *data) | 1326 | struct nfs_write_data *data) |
1221 | { | 1327 | { |
1222 | list_splice_tail_init(&data->pages, &desc->pg_list); | 1328 | struct nfs_pgio_header *hdr = data->header; |
1223 | if (data->req && list_empty(&data->req->wb_list)) | 1329 | |
1224 | nfs_list_add_request(data->req, &desc->pg_list); | 1330 | if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { |
1225 | nfs_pageio_reset_write_mds(desc); | 1331 | list_splice_tail_init(&hdr->pages, &desc->pg_list); |
1226 | desc->pg_recoalesce = 1; | 1332 | nfs_pageio_reset_write_mds(desc); |
1227 | put_lseg(data->lseg); | 1333 | desc->pg_recoalesce = 1; |
1334 | } | ||
1228 | nfs_writedata_release(data); | 1335 | nfs_writedata_release(data); |
1229 | } | 1336 | } |
1230 | 1337 | ||
@@ -1234,23 +1341,18 @@ pnfs_try_to_write_data(struct nfs_write_data *wdata, | |||
1234 | struct pnfs_layout_segment *lseg, | 1341 | struct pnfs_layout_segment *lseg, |
1235 | int how) | 1342 | int how) |
1236 | { | 1343 | { |
1237 | struct inode *inode = wdata->inode; | 1344 | struct nfs_pgio_header *hdr = wdata->header; |
1345 | struct inode *inode = hdr->inode; | ||
1238 | enum pnfs_try_status trypnfs; | 1346 | enum pnfs_try_status trypnfs; |
1239 | struct nfs_server *nfss = NFS_SERVER(inode); | 1347 | struct nfs_server *nfss = NFS_SERVER(inode); |
1240 | 1348 | ||
1241 | wdata->mds_ops = call_ops; | 1349 | hdr->mds_ops = call_ops; |
1242 | wdata->lseg = get_lseg(lseg); | ||
1243 | 1350 | ||
1244 | dprintk("%s: Writing ino:%lu %u@%llu (how %d)\n", __func__, | 1351 | dprintk("%s: Writing ino:%lu %u@%llu (how %d)\n", __func__, |
1245 | inode->i_ino, wdata->args.count, wdata->args.offset, how); | 1352 | inode->i_ino, wdata->args.count, wdata->args.offset, how); |
1246 | |||
1247 | trypnfs = nfss->pnfs_curr_ld->write_pagelist(wdata, how); | 1353 | trypnfs = nfss->pnfs_curr_ld->write_pagelist(wdata, how); |
1248 | if (trypnfs == PNFS_NOT_ATTEMPTED) { | 1354 | if (trypnfs != PNFS_NOT_ATTEMPTED) |
1249 | put_lseg(wdata->lseg); | ||
1250 | wdata->lseg = NULL; | ||
1251 | } else | ||
1252 | nfs_inc_stats(inode, NFSIOS_PNFS_WRITE); | 1355 | nfs_inc_stats(inode, NFSIOS_PNFS_WRITE); |
1253 | |||
1254 | dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs); | 1356 | dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs); |
1255 | return trypnfs; | 1357 | return trypnfs; |
1256 | } | 1358 | } |
@@ -1266,7 +1368,7 @@ pnfs_do_multiple_writes(struct nfs_pageio_descriptor *desc, struct list_head *he | |||
1266 | while (!list_empty(head)) { | 1368 | while (!list_empty(head)) { |
1267 | enum pnfs_try_status trypnfs; | 1369 | enum pnfs_try_status trypnfs; |
1268 | 1370 | ||
1269 | data = list_entry(head->next, struct nfs_write_data, list); | 1371 | data = list_first_entry(head, struct nfs_write_data, list); |
1270 | list_del_init(&data->list); | 1372 | list_del_init(&data->list); |
1271 | 1373 | ||
1272 | trypnfs = pnfs_try_to_write_data(data, call_ops, lseg, how); | 1374 | trypnfs = pnfs_try_to_write_data(data, call_ops, lseg, how); |
@@ -1276,43 +1378,82 @@ pnfs_do_multiple_writes(struct nfs_pageio_descriptor *desc, struct list_head *he | |||
1276 | put_lseg(lseg); | 1378 | put_lseg(lseg); |
1277 | } | 1379 | } |
1278 | 1380 | ||
1381 | static void pnfs_writehdr_free(struct nfs_pgio_header *hdr) | ||
1382 | { | ||
1383 | put_lseg(hdr->lseg); | ||
1384 | nfs_writehdr_free(hdr); | ||
1385 | } | ||
1386 | |||
1279 | int | 1387 | int |
1280 | pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) | 1388 | pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) |
1281 | { | 1389 | { |
1282 | LIST_HEAD(head); | 1390 | struct nfs_write_header *whdr; |
1391 | struct nfs_pgio_header *hdr; | ||
1283 | int ret; | 1392 | int ret; |
1284 | 1393 | ||
1285 | ret = nfs_generic_flush(desc, &head); | 1394 | whdr = nfs_writehdr_alloc(); |
1286 | if (ret != 0) { | 1395 | if (!whdr) { |
1396 | desc->pg_completion_ops->error_cleanup(&desc->pg_list); | ||
1287 | put_lseg(desc->pg_lseg); | 1397 | put_lseg(desc->pg_lseg); |
1288 | desc->pg_lseg = NULL; | 1398 | desc->pg_lseg = NULL; |
1289 | return ret; | 1399 | return -ENOMEM; |
1290 | } | 1400 | } |
1291 | pnfs_do_multiple_writes(desc, &head, desc->pg_ioflags); | 1401 | hdr = &whdr->header; |
1292 | return 0; | 1402 | nfs_pgheader_init(desc, hdr, pnfs_writehdr_free); |
1403 | hdr->lseg = get_lseg(desc->pg_lseg); | ||
1404 | atomic_inc(&hdr->refcnt); | ||
1405 | ret = nfs_generic_flush(desc, hdr); | ||
1406 | if (ret != 0) { | ||
1407 | put_lseg(desc->pg_lseg); | ||
1408 | desc->pg_lseg = NULL; | ||
1409 | } else | ||
1410 | pnfs_do_multiple_writes(desc, &hdr->rpc_list, desc->pg_ioflags); | ||
1411 | if (atomic_dec_and_test(&hdr->refcnt)) | ||
1412 | hdr->completion_ops->completion(hdr); | ||
1413 | return ret; | ||
1293 | } | 1414 | } |
1294 | EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages); | 1415 | EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages); |
1295 | 1416 | ||
1296 | static void pnfs_ld_handle_read_error(struct nfs_read_data *data) | 1417 | int pnfs_read_done_resend_to_mds(struct inode *inode, |
1418 | struct list_head *head, | ||
1419 | const struct nfs_pgio_completion_ops *compl_ops) | ||
1297 | { | 1420 | { |
1298 | struct nfs_pageio_descriptor pgio; | 1421 | struct nfs_pageio_descriptor pgio; |
1422 | LIST_HEAD(failed); | ||
1299 | 1423 | ||
1300 | put_lseg(data->lseg); | 1424 | /* Resend all requests through the MDS */ |
1301 | data->lseg = NULL; | 1425 | nfs_pageio_init_read_mds(&pgio, inode, compl_ops); |
1302 | dprintk("pnfs write error = %d\n", data->pnfs_error); | 1426 | while (!list_empty(head)) { |
1303 | if (NFS_SERVER(data->inode)->pnfs_curr_ld->flags & | 1427 | struct nfs_page *req = nfs_list_entry(head->next); |
1304 | PNFS_LAYOUTRET_ON_ERROR) | ||
1305 | pnfs_return_layout(data->inode); | ||
1306 | |||
1307 | nfs_pageio_init_read_mds(&pgio, data->inode); | ||
1308 | |||
1309 | while (!list_empty(&data->pages)) { | ||
1310 | struct nfs_page *req = nfs_list_entry(data->pages.next); | ||
1311 | 1428 | ||
1312 | nfs_list_remove_request(req); | 1429 | nfs_list_remove_request(req); |
1313 | nfs_pageio_add_request(&pgio, req); | 1430 | if (!nfs_pageio_add_request(&pgio, req)) |
1431 | nfs_list_add_request(req, &failed); | ||
1314 | } | 1432 | } |
1315 | nfs_pageio_complete(&pgio); | 1433 | nfs_pageio_complete(&pgio); |
1434 | |||
1435 | if (!list_empty(&failed)) { | ||
1436 | list_move(&failed, head); | ||
1437 | return -EIO; | ||
1438 | } | ||
1439 | return 0; | ||
1440 | } | ||
1441 | EXPORT_SYMBOL_GPL(pnfs_read_done_resend_to_mds); | ||
1442 | |||
1443 | static void pnfs_ld_handle_read_error(struct nfs_read_data *data) | ||
1444 | { | ||
1445 | struct nfs_pgio_header *hdr = data->header; | ||
1446 | |||
1447 | dprintk("pnfs read error = %d\n", hdr->pnfs_error); | ||
1448 | if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags & | ||
1449 | PNFS_LAYOUTRET_ON_ERROR) { | ||
1450 | clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(hdr->inode)->flags); | ||
1451 | pnfs_return_layout(hdr->inode); | ||
1452 | } | ||
1453 | if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) | ||
1454 | data->task.tk_status = pnfs_read_done_resend_to_mds(hdr->inode, | ||
1455 | &hdr->pages, | ||
1456 | hdr->completion_ops); | ||
1316 | } | 1457 | } |
1317 | 1458 | ||
1318 | /* | 1459 | /* |
@@ -1320,13 +1461,14 @@ static void pnfs_ld_handle_read_error(struct nfs_read_data *data) | |||
1320 | */ | 1461 | */ |
1321 | void pnfs_ld_read_done(struct nfs_read_data *data) | 1462 | void pnfs_ld_read_done(struct nfs_read_data *data) |
1322 | { | 1463 | { |
1323 | if (likely(!data->pnfs_error)) { | 1464 | struct nfs_pgio_header *hdr = data->header; |
1465 | |||
1466 | if (likely(!hdr->pnfs_error)) { | ||
1324 | __nfs4_read_done_cb(data); | 1467 | __nfs4_read_done_cb(data); |
1325 | data->mds_ops->rpc_call_done(&data->task, data); | 1468 | hdr->mds_ops->rpc_call_done(&data->task, data); |
1326 | } else | 1469 | } else |
1327 | pnfs_ld_handle_read_error(data); | 1470 | pnfs_ld_handle_read_error(data); |
1328 | put_lseg(data->lseg); | 1471 | hdr->mds_ops->rpc_release(data); |
1329 | data->mds_ops->rpc_release(data); | ||
1330 | } | 1472 | } |
1331 | EXPORT_SYMBOL_GPL(pnfs_ld_read_done); | 1473 | EXPORT_SYMBOL_GPL(pnfs_ld_read_done); |
1332 | 1474 | ||
@@ -1334,11 +1476,13 @@ static void | |||
1334 | pnfs_read_through_mds(struct nfs_pageio_descriptor *desc, | 1476 | pnfs_read_through_mds(struct nfs_pageio_descriptor *desc, |
1335 | struct nfs_read_data *data) | 1477 | struct nfs_read_data *data) |
1336 | { | 1478 | { |
1337 | list_splice_tail_init(&data->pages, &desc->pg_list); | 1479 | struct nfs_pgio_header *hdr = data->header; |
1338 | if (data->req && list_empty(&data->req->wb_list)) | 1480 | |
1339 | nfs_list_add_request(data->req, &desc->pg_list); | 1481 | if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { |
1340 | nfs_pageio_reset_read_mds(desc); | 1482 | list_splice_tail_init(&hdr->pages, &desc->pg_list); |
1341 | desc->pg_recoalesce = 1; | 1483 | nfs_pageio_reset_read_mds(desc); |
1484 | desc->pg_recoalesce = 1; | ||
1485 | } | ||
1342 | nfs_readdata_release(data); | 1486 | nfs_readdata_release(data); |
1343 | } | 1487 | } |
1344 | 1488 | ||
@@ -1350,23 +1494,19 @@ pnfs_try_to_read_data(struct nfs_read_data *rdata, | |||
1350 | const struct rpc_call_ops *call_ops, | 1494 | const struct rpc_call_ops *call_ops, |
1351 | struct pnfs_layout_segment *lseg) | 1495 | struct pnfs_layout_segment *lseg) |
1352 | { | 1496 | { |
1353 | struct inode *inode = rdata->inode; | 1497 | struct nfs_pgio_header *hdr = rdata->header; |
1498 | struct inode *inode = hdr->inode; | ||
1354 | struct nfs_server *nfss = NFS_SERVER(inode); | 1499 | struct nfs_server *nfss = NFS_SERVER(inode); |
1355 | enum pnfs_try_status trypnfs; | 1500 | enum pnfs_try_status trypnfs; |
1356 | 1501 | ||
1357 | rdata->mds_ops = call_ops; | 1502 | hdr->mds_ops = call_ops; |
1358 | rdata->lseg = get_lseg(lseg); | ||
1359 | 1503 | ||
1360 | dprintk("%s: Reading ino:%lu %u@%llu\n", | 1504 | dprintk("%s: Reading ino:%lu %u@%llu\n", |
1361 | __func__, inode->i_ino, rdata->args.count, rdata->args.offset); | 1505 | __func__, inode->i_ino, rdata->args.count, rdata->args.offset); |
1362 | 1506 | ||
1363 | trypnfs = nfss->pnfs_curr_ld->read_pagelist(rdata); | 1507 | trypnfs = nfss->pnfs_curr_ld->read_pagelist(rdata); |
1364 | if (trypnfs == PNFS_NOT_ATTEMPTED) { | 1508 | if (trypnfs != PNFS_NOT_ATTEMPTED) |
1365 | put_lseg(rdata->lseg); | ||
1366 | rdata->lseg = NULL; | ||
1367 | } else { | ||
1368 | nfs_inc_stats(inode, NFSIOS_PNFS_READ); | 1509 | nfs_inc_stats(inode, NFSIOS_PNFS_READ); |
1369 | } | ||
1370 | dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs); | 1510 | dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs); |
1371 | return trypnfs; | 1511 | return trypnfs; |
1372 | } | 1512 | } |
@@ -1382,7 +1522,7 @@ pnfs_do_multiple_reads(struct nfs_pageio_descriptor *desc, struct list_head *hea | |||
1382 | while (!list_empty(head)) { | 1522 | while (!list_empty(head)) { |
1383 | enum pnfs_try_status trypnfs; | 1523 | enum pnfs_try_status trypnfs; |
1384 | 1524 | ||
1385 | data = list_entry(head->next, struct nfs_read_data, list); | 1525 | data = list_first_entry(head, struct nfs_read_data, list); |
1386 | list_del_init(&data->list); | 1526 | list_del_init(&data->list); |
1387 | 1527 | ||
1388 | trypnfs = pnfs_try_to_read_data(data, call_ops, lseg); | 1528 | trypnfs = pnfs_try_to_read_data(data, call_ops, lseg); |
@@ -1392,20 +1532,40 @@ pnfs_do_multiple_reads(struct nfs_pageio_descriptor *desc, struct list_head *hea | |||
1392 | put_lseg(lseg); | 1532 | put_lseg(lseg); |
1393 | } | 1533 | } |
1394 | 1534 | ||
1535 | static void pnfs_readhdr_free(struct nfs_pgio_header *hdr) | ||
1536 | { | ||
1537 | put_lseg(hdr->lseg); | ||
1538 | nfs_readhdr_free(hdr); | ||
1539 | } | ||
1540 | |||
1395 | int | 1541 | int |
1396 | pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) | 1542 | pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) |
1397 | { | 1543 | { |
1398 | LIST_HEAD(head); | 1544 | struct nfs_read_header *rhdr; |
1545 | struct nfs_pgio_header *hdr; | ||
1399 | int ret; | 1546 | int ret; |
1400 | 1547 | ||
1401 | ret = nfs_generic_pagein(desc, &head); | 1548 | rhdr = nfs_readhdr_alloc(); |
1402 | if (ret != 0) { | 1549 | if (!rhdr) { |
1550 | desc->pg_completion_ops->error_cleanup(&desc->pg_list); | ||
1551 | ret = -ENOMEM; | ||
1403 | put_lseg(desc->pg_lseg); | 1552 | put_lseg(desc->pg_lseg); |
1404 | desc->pg_lseg = NULL; | 1553 | desc->pg_lseg = NULL; |
1405 | return ret; | 1554 | return ret; |
1406 | } | 1555 | } |
1407 | pnfs_do_multiple_reads(desc, &head); | 1556 | hdr = &rhdr->header; |
1408 | return 0; | 1557 | nfs_pgheader_init(desc, hdr, pnfs_readhdr_free); |
1558 | hdr->lseg = get_lseg(desc->pg_lseg); | ||
1559 | atomic_inc(&hdr->refcnt); | ||
1560 | ret = nfs_generic_pagein(desc, hdr); | ||
1561 | if (ret != 0) { | ||
1562 | put_lseg(desc->pg_lseg); | ||
1563 | desc->pg_lseg = NULL; | ||
1564 | } else | ||
1565 | pnfs_do_multiple_reads(desc, &hdr->rpc_list); | ||
1566 | if (atomic_dec_and_test(&hdr->refcnt)) | ||
1567 | hdr->completion_ops->completion(hdr); | ||
1568 | return ret; | ||
1409 | } | 1569 | } |
1410 | EXPORT_SYMBOL_GPL(pnfs_generic_pg_readpages); | 1570 | EXPORT_SYMBOL_GPL(pnfs_generic_pg_readpages); |
1411 | 1571 | ||
@@ -1438,30 +1598,32 @@ EXPORT_SYMBOL_GPL(pnfs_set_lo_fail); | |||
1438 | void | 1598 | void |
1439 | pnfs_set_layoutcommit(struct nfs_write_data *wdata) | 1599 | pnfs_set_layoutcommit(struct nfs_write_data *wdata) |
1440 | { | 1600 | { |
1441 | struct nfs_inode *nfsi = NFS_I(wdata->inode); | 1601 | struct nfs_pgio_header *hdr = wdata->header; |
1602 | struct inode *inode = hdr->inode; | ||
1603 | struct nfs_inode *nfsi = NFS_I(inode); | ||
1442 | loff_t end_pos = wdata->mds_offset + wdata->res.count; | 1604 | loff_t end_pos = wdata->mds_offset + wdata->res.count; |
1443 | bool mark_as_dirty = false; | 1605 | bool mark_as_dirty = false; |
1444 | 1606 | ||
1445 | spin_lock(&nfsi->vfs_inode.i_lock); | 1607 | spin_lock(&inode->i_lock); |
1446 | if (!test_and_set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) { | 1608 | if (!test_and_set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) { |
1447 | mark_as_dirty = true; | 1609 | mark_as_dirty = true; |
1448 | dprintk("%s: Set layoutcommit for inode %lu ", | 1610 | dprintk("%s: Set layoutcommit for inode %lu ", |
1449 | __func__, wdata->inode->i_ino); | 1611 | __func__, inode->i_ino); |
1450 | } | 1612 | } |
1451 | if (!test_and_set_bit(NFS_LSEG_LAYOUTCOMMIT, &wdata->lseg->pls_flags)) { | 1613 | if (!test_and_set_bit(NFS_LSEG_LAYOUTCOMMIT, &hdr->lseg->pls_flags)) { |
1452 | /* references matched in nfs4_layoutcommit_release */ | 1614 | /* references matched in nfs4_layoutcommit_release */ |
1453 | get_lseg(wdata->lseg); | 1615 | get_lseg(hdr->lseg); |
1454 | } | 1616 | } |
1455 | if (end_pos > nfsi->layout->plh_lwb) | 1617 | if (end_pos > nfsi->layout->plh_lwb) |
1456 | nfsi->layout->plh_lwb = end_pos; | 1618 | nfsi->layout->plh_lwb = end_pos; |
1457 | spin_unlock(&nfsi->vfs_inode.i_lock); | 1619 | spin_unlock(&inode->i_lock); |
1458 | dprintk("%s: lseg %p end_pos %llu\n", | 1620 | dprintk("%s: lseg %p end_pos %llu\n", |
1459 | __func__, wdata->lseg, nfsi->layout->plh_lwb); | 1621 | __func__, hdr->lseg, nfsi->layout->plh_lwb); |
1460 | 1622 | ||
1461 | /* if pnfs_layoutcommit_inode() runs between inode locks, the next one | 1623 | /* if pnfs_layoutcommit_inode() runs between inode locks, the next one |
1462 | * will be a noop because NFS_INO_LAYOUTCOMMIT will not be set */ | 1624 | * will be a noop because NFS_INO_LAYOUTCOMMIT will not be set */ |
1463 | if (mark_as_dirty) | 1625 | if (mark_as_dirty) |
1464 | mark_inode_dirty_sync(wdata->inode); | 1626 | mark_inode_dirty_sync(inode); |
1465 | } | 1627 | } |
1466 | EXPORT_SYMBOL_GPL(pnfs_set_layoutcommit); | 1628 | EXPORT_SYMBOL_GPL(pnfs_set_layoutcommit); |
1467 | 1629 | ||
@@ -1550,3 +1712,15 @@ out_free: | |||
1550 | kfree(data); | 1712 | kfree(data); |
1551 | goto out; | 1713 | goto out; |
1552 | } | 1714 | } |
1715 | |||
1716 | struct nfs4_threshold *pnfs_mdsthreshold_alloc(void) | ||
1717 | { | ||
1718 | struct nfs4_threshold *thp; | ||
1719 | |||
1720 | thp = kzalloc(sizeof(*thp), GFP_NOFS); | ||
1721 | if (!thp) { | ||
1722 | dprintk("%s mdsthreshold allocation failed\n", __func__); | ||
1723 | return NULL; | ||
1724 | } | ||
1725 | return thp; | ||
1726 | } | ||